1. Vanila LSTM
from random import randint from numpy import array from numpy import argmax
Sequence Generator
def generate_sequence(length, n_features): return [randint(0, n_features-1) for _ in range(length)]
Generate random sequence
sequence = generate_sequence(25, 100) print(sequence)
[81, 54, 44, 82, 86, 43, 46, 20, 85, 71, 55, 58, 26, 78, 66, 70, 38, 55, 91, 25, 51, 32, 89, 9, 83]
One Hot Encoder
One hot encode sequence
def one_hot_encode(sequence, n_features): encoding = list() for value in sequence: vector = [0 for _ in range(n_features)] vector[value] = 1 encoding.append(vector) return array(encoding)
encoded = one_hot_encode(sequence, 100) print(encoded)
[[0 0 0 ..., 0 0 0] [0 0 0 ..., 0 0 0] [0 0 0 ..., 0 0 0] ..., [0 0 0 ..., 0 0 0] [0 0 0 ..., 0 0 0] [0 0 0 ..., 0 0 0]]
One Hot Decoder
Decode a one hot encoded string
def one_hot_decode(encoded_seq): return [argmax(vector) for vector in encoded_seq]
One hot decode
decoded = one_hot_decode(encoded) print(decoded)
[81, 54, 44, 82, 86, 43, 46, 20, 85, 71, 55, 58, 26, 78, 66, 70, 38, 55, 91, 25, 51, 32, 89, 9, 83]
Generate Sequence Examples
length = 5 n_features =10
#generate sequence sequence = generate_sequence(length, n_features) # one hot encode encoded = one_hot_encode(sequence, n_features) # reshape sequence to be 3D X = encoded.reshape((1, length, n_features)) print('Sequence is: {},\n encoded is :{},\n X has shape: {},\n X is: {}'\ .format(sequence,encoded,X.shape,X))
Sequence is: [2, 4, 2, 5, 7], encoded is :[[0 0 1 0 0 0 0 0 0 0] [0 0 0 0 1 0 0 0 0 0] [0 0 1 0 0 0 0 0 0 0] [0 0 0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 1 0 0]], X has shape: (1, 5, 10), X is: [[[0 0 1 0 0 0 0 0 0 0] [0 0 0 0 1 0 0 0 0 0] [0 0 1 0 0 0 0 0 0 0] [0 0 0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 1 0 0]]]
y = encoded[2].reshape(1, n_features) y
array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0]])
Generate one example for an lstm
def generate_example(length, n_features, out_index): # generate sequence sequence = generate_sequence(length, n_features) # one hot encode encoded = one_hot_encode(sequence, n_features) # reshape sequence to be 3D X = encoded.reshape((1, length, n_features)) # select output y = encoded[out_index].reshape(1, n_features) return X, y
Vanila LSTM
from keras.models import Sequential from keras.layers import LSTM from keras.layers import Dense
- Define model
length = 50 n_features = 100 out_index = 2 model = Sequential() model.add(LSTM(25, input_shape=(length, n_features))) model.add(Dense(n_features, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) print(model.summary())
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm_3 (LSTM) (None, 25) 12600 _________________________________________________________________ dense_3 (Dense) (None, 100) 2600 ================================================================= Total params: 15,200 Trainable params: 15,200 Non-trainable params: 0 _________________________________________________________________ None
- Fit model
for i in range(10): X, y = generate_example(length, n_features, out_index) model.fit(X, y, epochs=1, verbose=2)
Epoch 1/1 1s - loss: 4.6111 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.5636 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.6073 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.5968 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.5582 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.5816 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.6481 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.5947 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.6196 - acc: 0.0000e+00 Epoch 1/1 0s - loss: 4.5835 - acc: 0.0000e+00
- Evaluate model
correct = 0 for i in range(100): X, y = generate_example(length, n_features, out_index) yhat = model.predict(X) if one_hot_decode(yhat) == one_hot_decode(y): correct += 1 print('Accuracy: %f' % ((correct/100)*100.0))
Accuracy: 0.000000
- Prediction on new data
X, y = generate_example(length, n_features, out_index) yhat = model.predict(X) print('Sequence: %s' % [one_hot_decode(x) for x in X]) print('Expected: %s' % one_hot_decode(y)) print('Predicted: %s' % one_hot_decode(yhat))
Sequence: [[60, 56, 22, 5, 67, 76, 43, 14, 35, 72, 11, 48, 30, 44, 12, 55, 41, 79, 17, 70, 20, 39, 70, 1, 48, 94, 12, 56, 46, 4, 92, 77, 50, 12, 91, 50, 78, 59, 47, 83, 75, 19, 15, 57, 78, 34, 88, 75, 3, 19]] Expected: [22] Predicted: [30]