How do I create a variable-length input LSTM in Keras? How do I create a variable-length input LSTM in Keras? python-3.x python-3.x

How do I create a variable-length input LSTM in Keras?


I am not clear about the embedding procedure. But still here is a way to implement a variable-length input LSTM. Just do not specify the timespan dimension when building LSTM.

import keras.backend as Kfrom keras.layers import LSTM, InputI = Input(shape=(None, 200)) # unknown timespan, fixed feature sizelstm = LSTM(20)f = K.function(inputs=[I], outputs=[lstm(I)])import numpy as npdata1 = np.random.random(size=(1, 100, 200)) # batch_size = 1, timespan = 100print f([data1])[0].shape# (1, 20)data2 = np.random.random(size=(1, 314, 200)) # batch_size = 1, timespan = 314print f([data2])[0].shape# (1, 20)


The trick to training and classifying sequences is training with masking and classifying using a stateful network. Here's an example that I made that classifies whether a sequence of variable length starts with zero or not.

import numpy as npnp.random.seed(1)import tensorflow as tftf.set_random_seed(1)from keras import modelsfrom keras.layers import Dense, Masking, LSTMimport matplotlib.pyplot as pltdef stateful_model():    hidden_units = 256    model = models.Sequential()    model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))    model.add(Dense(1, activation='relu', name='output'))    model.compile(loss='binary_crossentropy', optimizer='rmsprop')    return modeldef train_rnn(x_train, y_train, max_len, mask):    epochs = 10    batch_size = 200    vec_dims = 1    hidden_units = 256    in_shape = (max_len, vec_dims)    model = models.Sequential()    model.add(Masking(mask, name="in_layer", input_shape=in_shape,))    model.add(LSTM(hidden_units, return_sequences=False))    model.add(Dense(1, activation='relu', name='output'))    model.compile(loss='binary_crossentropy', optimizer='rmsprop')    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,              validation_split=0.05)    return modeldef gen_train_sig_cls_pair(t_stops, num_examples, mask):    x = []    y = []    max_t = int(np.max(t_stops))    for t_stop in t_stops:        one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)        sig = np.zeros((num_examples, max_t), dtype=np.int8)        sig[one_indices, 0] = 1        sig[:, t_stop:] = mask        x.append(sig)        cls = np.zeros(num_examples, dtype=np.bool)        cls[one_indices] = 1        y.append(cls)    return np.concatenate(x, axis=0), np.concatenate(y, axis=0)def gen_test_sig_cls_pair(t_stops, num_examples):    x = []    y = []    for t_stop in t_stops:        one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)        sig = np.zeros((num_examples, t_stop), dtype=np.bool)        sig[one_indices, 0] = 1        x.extend(list(sig))        cls = np.zeros((num_examples, t_stop), dtype=np.bool)        cls[one_indices] = 1        y.extend(list(cls))    return x, yif __name__ == '__main__':    noise_mag = 0.01    mask_val = -10    signal_lengths = (10, 15, 20)    x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)    mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)    testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)    state_mod = stateful_model()    state_mod.set_weights(mod.get_weights())    res = []    for s_i in range(len(testing_dat)):        seq_in = list(testing_dat[s_i])        seq_len = len(seq_in)        for t_i in range(seq_len):            res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))        state_mod.reset_states()    fig, axes = plt.subplots(2)    axes[0].plot(np.concatenate(testing_dat), label="input")    axes[1].plot(res, "ro", label="result", alpha=0.2)    axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)    axes[1].legend(bbox_to_anchor=(1.1, 1))    plt.show()


Not sure how applicable recurrent networks are for your sequences, ie how strongly dependent each element is on its preceding sequence as opposed to other factors. That being said (which doesn't help you one bit of course), if you don't want to pad your input with some bad value, a stateful model that processes a single timestep at once is the only alternative for variable length sequences IMHO. If you don't mind taking an alternative approach to encoding:

import numpy as npimport keras.models as kemimport keras.layers as kelimport keras.callbacks as kecimport sklearn.preprocessing as skprepX_train, max_features = {'Sequence': [[1, 2, 4, 5, 8, 10, 16], [1, 2, 1, 5, 5, 1, 11, 16, 7]]}, 16num_mem_units = 64size_batch = 1num_timesteps = 1num_features = 1num_targets = 1num_epochs = 1500model = kem.Sequential()model.add(kel.LSTM(num_mem_units, stateful=True,  batch_input_shape=(size_batch, num_timesteps, num_features),  return_sequences=True))model.add(kel.Dense(num_targets, activation='sigmoid'))model.summary()model.compile(loss='binary_crossentropy', optimizer='adam')range_act = (0, 1) # sigmoidrange_features = np.array([0, max_features]).reshape(-1, 1)normalizer = skprep.MinMaxScaler(feature_range=range_act)normalizer.fit(range_features)reset_state = kec.LambdaCallback(on_epoch_end=lambda *_ : model.reset_states())# trainingfor seq in X_train['Sequence']:    X = seq[:-1]    y = seq[1:] # predict next element    X_norm = normalizer.transform(np.array(X).reshape(-1, 1)).reshape(-1, num_timesteps, num_features)    y_norm = normalizer.transform(np.array(y).reshape(-1, 1)).reshape(-1, num_timesteps, num_targets)    model.fit(X_norm, y_norm, epochs=num_epochs, batch_size=size_batch, shuffle=False,      callbacks=[reset_state])# predictionfor seq in X_train['Sequence']:    model.reset_states()     for istep in range(len(seq)-1): # input up to not incl last        val = seq[istep]        X = np.array([val]).reshape(-1, 1)        X_norm = normalizer.transform(X).reshape(-1, num_timesteps, num_features)        y_norm = model.predict(X_norm)    yhat = int(normalizer.inverse_transform(y_norm[0])[0, 0])    y = seq[-1] # last    put = '{0} predicts {1:d}, expecting {2:d}'.format(', '.join(str(val) for val in seq[:-1]), yhat, y)    print(put)

which produces sth like:

1, 2, 4, 5, 8, 10 predicts 11, expecting 161, 2, 1, 5, 5, 1, 11, 16 predicts 7, expecting 7

with ridiculous loss, however.