1. Vanila LSTM

from random import randint
from numpy import array
from numpy import argmax

Sequence Generator

def generate_sequence(length, n_features):
    return [randint(0, n_features-1) for _ in range(length)]

Generate random sequence

sequence = generate_sequence(25, 100)
print(sequence)
[81, 54, 44, 82, 86, 43, 46, 20, 85, 71, 55, 58, 26, 78, 66, 70, 38, 55, 91, 25, 51, 32, 89, 9, 83]

One Hot Encoder

One hot encode sequence

def one_hot_encode(sequence, n_features):
    encoding = list()
    for value in sequence:
        vector = [0 for _ in range(n_features)]
        vector[value] = 1
        encoding.append(vector)
    return array(encoding)
encoded = one_hot_encode(sequence, 100)
print(encoded)
[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]]

One Hot Decoder

Decode a one hot encoded string

def one_hot_decode(encoded_seq):
    return [argmax(vector) for vector in encoded_seq]

One hot decode

decoded = one_hot_decode(encoded)
print(decoded)
[81, 54, 44, 82, 86, 43, 46, 20, 85, 71, 55, 58, 26, 78, 66, 70, 38, 55, 91, 25, 51, 32, 89, 9, 83]

Generate Sequence Examples

length = 5
n_features =10
#generate sequence
sequence = generate_sequence(length, n_features)

# one hot encode
encoded = one_hot_encode(sequence, n_features)

# reshape sequence to be 3D
X = encoded.reshape((1, length, n_features))
print('Sequence is: {},\n encoded is :{},\n X has shape: {},\n X is: {}'\
      .format(sequence,encoded,X.shape,X))
Sequence is: [2, 4, 2, 5, 7],
 encoded is :[[0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]],
 X has shape: (1, 5, 10),
 X is: [[[0 0 1 0 0 0 0 0 0 0]
  [0 0 0 0 1 0 0 0 0 0]
  [0 0 1 0 0 0 0 0 0 0]
  [0 0 0 0 0 1 0 0 0 0]
  [0 0 0 0 0 0 0 1 0 0]]]
y = encoded[2].reshape(1, n_features)
y
array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0]])

Generate one example for an lstm

def generate_example(length, n_features, out_index):

    # generate sequence
    sequence = generate_sequence(length, n_features)

    # one hot encode
    encoded = one_hot_encode(sequence, n_features)

    # reshape sequence to be 3D
    X = encoded.reshape((1, length, n_features))

    # select output
    y = encoded[out_index].reshape(1, n_features)

    return X, y

Vanila LSTM

from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
  • Define model
length = 50
n_features = 100
out_index = 2


model = Sequential()

model.add(LSTM(25, input_shape=(length, n_features)))

model.add(Dense(n_features, activation='softmax')) 

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc']) 

print(model.summary())
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_3 (LSTM)                (None, 25)                12600     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               2600      
=================================================================
Total params: 15,200
Trainable params: 15,200
Non-trainable params: 0
_________________________________________________________________
None
  • Fit model
for i in range(10):
    X, y = generate_example(length, n_features, out_index)
    model.fit(X, y, epochs=1, verbose=2)
Epoch 1/1
1s - loss: 4.6111 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.5636 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.6073 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.5968 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.5582 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.5816 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.6481 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.5947 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.6196 - acc: 0.0000e+00
Epoch 1/1
0s - loss: 4.5835 - acc: 0.0000e+00
  • Evaluate model
correct = 0
for i in range(100):
    X, y = generate_example(length, n_features, out_index)
    yhat = model.predict(X)
    if one_hot_decode(yhat) == one_hot_decode(y):
        correct += 1
print('Accuracy: %f' % ((correct/100)*100.0))
Accuracy: 0.000000
  • Prediction on new data
X, y = generate_example(length, n_features, out_index) 
yhat = model.predict(X)
print('Sequence: %s' % [one_hot_decode(x) for x in X])
print('Expected: %s' % one_hot_decode(y)) 
print('Predicted: %s' % one_hot_decode(yhat))
Sequence: [[60, 56, 22, 5, 67, 76, 43, 14, 35, 72, 11, 48, 30, 44, 12, 55, 41, 79, 17, 70, 20, 39, 70, 1, 48, 94, 12, 56, 46, 4, 92, 77, 50, 12, 91, 50, 78, 59, 47, 83, 75, 19, 15, 57, 78, 34, 88, 75, 3, 19]]
Expected: [22]
Predicted: [30]