3

I'm trying to predict the next value based on the previous value for my cnn+lstm model, but I get the overall average for each prediction. My data consists of heatmap images of a neighborhood (features) for twenty weeks with the number of crimes (labels) for each week. I tried changing the number of epochs, changing the batch size, and changing number of parameters in my model. Below is my model.

 # MODEL
        from keras.models import Sequential
        from keras.layers import Conv2D, MaxPooling2D
        from keras.layers import Activation, Dropout, Flatten, Dense
        
        
        def baseline_model():
            #create model
            model = Sequential()
            model.add(
                TimeDistributed(
                    Conv2D(16, (3, 3), strides=(2,2), data_format='channels_last',    activation='relu'),
                    input_shape=(1,256, 256,3)# looking back 1 image
        
               )
            )
        
            model.add(
                TimeDistributed(
                    MaxPooling2D(pool_size=(2, 2))
                )
            )
        
            model.add(
                TimeDistributed(
                    Conv2D(16, (3, 3), activation='relu'),
                )
            )
        
            model.add(
                TimeDistributed(
                    MaxPooling2D(pool_size=(2, 2))
                )
            )
        
        
            model.add(
                TimeDistributed(
                    Conv2D(32, (3, 3),activation='relu'),
                )
            )
        
            model.add(
                TimeDistributed(
                    MaxPooling2D(pool_size=(2, 2))
                )
            )
        
            model.add(
                TimeDistributed(
                    Flatten()
                )
            )
        
            model.add(
                    LSTM(4, return_sequences=True)
            )
        
            model.add(Dense(2, activation='relu'))
            model.add(Flatten())
            model.add(Dense((1), activation='linear'))
        
          
            #Compile model
            model.compile(loss='mean_squared_error', optimizer='adam')
            return model




    # evaluate model
estimator = KerasRegressor(build_fn=baseline_model, epochs=500, batch_size=1,verbose=0)
kfold = KFold(n_splits=10)
results = cross_val_score(estimator, X, y, cv=kfold)
print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))
Baseline: -16.57 (19.04) MSE


estimator.fit(X, y)
prediction = estimator.predict(X)

print(y)
print(prediction)
[[ 4]
 [ 7]
 [ 7]
 [ 6]
 [13]
 [11]
 [10]
 [ 4]
 [11]
 [10]
 [ 6]
 [ 7]
 [ 2]
 [17]
 [14]
 [ 9]
 [ 8]
 [ 8]
 [ 4]
 [ 8]]
[8.324332 8.324332 8.324332 8.324332 8.324332 8.324332 8.324332 8.324332
 8.324332 8.324332 8.324332 8.324332 8.324332 8.324332 8.324332 8.324332
 8.324332 8.324332 8.324332 8.324332] 
  • I think it's coming from the way you use `TimeDistributed` layer. This apply the wrapped layer to every time steps using the same weight. So the only way to acheive minimal MSE loss while being constraint to the same weight accross every timesteps is to aim for the overall average at every time step. – Yoan B. M.Sc Nov 23 '20 at 15:29
  • You can read this to help you modify your architecture accordingly : https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/ – Yoan B. M.Sc Nov 23 '20 at 15:29

1 Answers1

0

I looked over my model and made a few changes.

# MODEL
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

def ReshapeLayer(x):
        shape = x.shape
        
        reshape = Reshape((shape[2],shape[3]*shape[4]))(x)
        
        return reshape
    

    
def baseline_model():
    #create model
    model = Sequential()
    model.add(
        TimeDistributed(
            Conv2D(16, (3, 3), strides=(2,2), data_format='channels_last', activation='relu')
            ,input_shape=(1,256, 256,3)
        )
        
    )

    model.add(
        TimeDistributed(
            MaxPooling2D(pool_size=(2, 2))
        )
    )

    model.add(
        TimeDistributed(
            Conv2D(16, (3, 3), activation='relu'),
        )
    )

    model.add(
        TimeDistributed(
            MaxPooling2D(pool_size=(2, 2))
        )
    )


    model.add(
        TimeDistributed(
            Conv2D(32, (3, 3),activation='relu'),
        )
    )

    model.add(
        TimeDistributed(
            MaxPooling2D(pool_size=(2, 2))
        )
    )

    model.add(
        Lambda(ReshapeLayer)
    )
    
    
    model.add(
            LSTM(20, activation='relu', return_sequences=True)
    )

    model.add( 
            Dense((2), activation='relu')
    )
    model.add(
         Flatten()
        
    )

    model.add(
    
            Dense((1))
    
    )
        
        

  
    #Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

Model Summary:

Model: "sequential_58"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
time_distributed_343 (TimeDi (None, 1, 127, 127, 16)   448       
_________________________________________________________________
time_distributed_344 (TimeDi (None, 1, 63, 63, 16)     0         
_________________________________________________________________
time_distributed_345 (TimeDi (None, 1, 61, 61, 16)     2320      
_________________________________________________________________
time_distributed_346 (TimeDi (None, 1, 30, 30, 16)     0         
_________________________________________________________________
time_distributed_347 (TimeDi (None, 1, 28, 28, 32)     4640      
_________________________________________________________________
time_distributed_348 (TimeDi (None, 1, 14, 14, 32)     0         
_________________________________________________________________
lambda_58 (Lambda)           (None, 14, 448)           0         
_________________________________________________________________
lstm_58 (LSTM)               (None, 14, 20)            37520     
_________________________________________________________________
dense_115 (Dense)            (None, 14, 2)             42        
_________________________________________________________________
flatten_58 (Flatten)         (None, 28)                0         
_________________________________________________________________
dense_116 (Dense)            (None, 1)                 29        
=================================================================
Total params: 44,999
Trainable params: 44,999
Non-trainable params: 0
_________________________________________________________________

I was able to get the result I was looking for when I tried to predict the crime count for Downtown Vancouver.

 # evaluate model
    estimator KerasRegressor(build_fn=baseline_model,epochs=500,batch_size=1,verbose=0)
    kfold = KFold(n_splits=10)
    results = cross_val_score(estimator, X, y, cv=kfold)
    print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Baseline: -3838.29 (10400.71) MSE

# evaluate
estimator.fit(X, y)
prediction = estimator.predict(X)

print(y)
print(prediction)
[[200]
 [189]
 [224]
 [170]
 [195]
 [197]
 [236]
 [156]
 [203]
 [218]
 [215]
 [240]
 [175]
 [223]
 [239]
 [222]
 [174]
 [207]
 [201]
 [200]]
[199.85223 188.7917  223.93802 169.9083  194.99187 196.86598 235.94765
 155.94873 202.9606  217.96512 214.86911 240.00726 175.0241  223.01225
 238.89543 221.8833  173.89732 206.95938 200.80322 199.88109]

However, whenever I tried to predict the crime rate for Fairview Vancouver I got the same value for each prediction.

# evaluate model
estimator = KerasRegressor(build_fn=baseline_model, epochs=500, batch_size=1,verbose=0)
kfold = KFold(n_splits=10)
results = cross_val_score(estimator, X, y, cv=kfold)
print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Baseline: -782.18 (735.71) MSE

# evaluate
estimator.fit(X, y)
prediction = estimator.predict(X)

print(y)
print(prediction)

[[39]
 [40]
 [36]
 [29]
 [44]
 [49]
 [35]
 [29]
 [49]
 [55]
 [40]
 [57]
 [38]
 [39]
 [38]
 [37]
 [24]
 [53]
 [32]
 [43]]
[9.494502 9.494502 9.494502 9.494502 9.494502 9.494502 9.494502 9.494502
 9.494502 9.494502 9.494502 9.494502 9.494502 9.494502 9.494502 9.494502
 9.494502 9.494502 9.494502 9.494502]

I don't know why it would give me same values for each prediction when the prediction for Downtown gave me varying values even though I'm using the same model for both predictions. Could it be dependent on the MSE whether or not my prediction values are all the same or not?