TensorFlow Federated - Adapting existing keras model - python-3.x

I'm having trouble adapting an existing Keras model to work with TenforFlow Federated.
The existing model is a 1D convolutional autoencoder (details shown below)
Existing Model:
input_window = Input(shape=(window_length,1))
x = Conv1D(16, 3, activation="relu", padding="same")(input_window)
x = MaxPooling1D(2, padding="same")(x)
x = Conv1D(1, 3, activation="relu", padding="same")(x)
encoded = MaxPooling1D(2, padding="same")(x)
encoder = Model(input_window, encoded)
x = Conv1D(1, 3, activation="relu", padding="same")(encoded)
x = UpSampling1D(2)(x)
x = Conv1D(16, 1, activation='relu')(x)
x = UpSampling1D(2)(x)
decoded = Conv1D(1, 3, activation='sigmoid', padding='same')(x)
autoencoder = Model(input_window, decoded)
Training data is passed as a numpy.ndarray of shape (102, 48, 1).
Conceptually, this represents 102 days worth of data, each containg 48 values. I can provide an example of this if it would assist in answering.
My attempt to convert the model is shown below.
Converted Model:
def create_compiled_keras_model():
input_window = tf.keras.layers.Input(shape=(window_length,1))
x = tf.keras.layers.Conv1D(16, 3, activation="relu", padding="same")(input_window)
x = tf.keras.layers.MaxPooling1D(2, padding="same")(x)
x = tf.keras.layers.Conv1D(1, 3, activation="relu", padding="same")(x)
encoded = tf.keras.layers.MaxPooling1D(2, padding="same")(x)
encoder = tf.keras.Model(input_window, encoded)
x = tf.keras.layers.Conv1D(1, 3, activation="relu", padding="same")(encoded)
x = tf.keras.layers.UpSampling1D(2)(x)
x = tf.keras.layers.Conv1D(16, 1, activation='relu')(x)
x = tf.keras.layers.UpSampling1D(2)(x)
decoded = tf.keras.layers.Conv1D(1, 3, activation='sigmoid', padding='same')(x)
autoencoder = tf.keras.Model(input_window, decoded)
autoencoder.compile(optimizer='adam', loss='MSE')
return autoencoder
sample_batch = train // numpy.ndarray of shape (102, 48, 1)
def model_fn():
keras_model = create_compiled_keras_model()
return tff.learning.from_compiled_keras_model(keras_model, train)
This produces the error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-397-9bed171c79be> in <module>
----> 1 model = model_fn()
<ipython-input-396-13bc1955a7f2> in model_fn()
1 def model_fn():
2 keras_model = create_compiled_keras_model()
----> 3 return tff.learning.from_compiled_keras_model(keras_model, train)
~/miniconda3/lib/python3.6/site-packages/tensorflow_federated/python/learning/model_utils.py in from_compiled_keras_model(keras_model, dummy_batch)
190 raise ValueError('`keras_model` must be compiled. Use from_keras_model() '
191 'instead.')
--> 192 return enhance(_TrainableKerasModel(keras_model, dummy_batch))
193
194
~/miniconda3/lib/python3.6/site-packages/tensorflow_federated/python/learning/model_utils.py in __init__(self, inner_model, dummy_batch)
434 # until the model has been called on input. The work-around is to call
435 # Model.test_on_batch() once before asking for metrics.
--> 436 inner_model.test_on_batch(**dummy_batch)
437 # This must occur after test_on_batch()
438 if len(inner_model.loss_functions) != 1:
TypeError: test_on_batch() argument after ** must be a mapping, not numpy.ndarray
So far I have been unable to resolve this.
Is this an issue relating to my model not being compiled correctly, or due to the way I'm passing data?
Any help in resolving this would be greatly appreciated, thanks!

The sample batch should be something that can be passed to batch_input argument of tff.learning.Model.forward_pass.
For wrapped Keras models, this must be a dict with keys matching the arguments to tf.keras.models.Model.test_on_batch.
For this case, I think you may be able to simply wrap the sample batch in a dict with a single key of x:
numpy_sample_batch = train // numpy.ndarray
sample_batch = {'x': numpy_sample_batch}

Related

Coremltools: errors to get simplest convolutional model working

Suppose I create simplest model in Keras:
from keras.layers import *
from keras import Input, Model
import coremltools
def MyModel(inputs_shape=(None,None,3), channels=64):
inpt = Input(shape=inputs_shape)
# channels
skip = Conv2D(channels, (3, 3), strides=1, activation=None, padding='same', name='conv_in')(inpt)
out = Conv2D(3, (3, 3), strides=1, padding='same', activation='tanh',name='out')(skip)
return Model(inputs=inpt, outputs=out)
model = MyModel()
coreml_model = coremltools.converters.keras.convert(model,
input_names=["inp1"],
output_names=["out1"],
image_scale=1.0,
model_precision='float32',
use_float_arraytype=True,
input_name_shape_dict={'inp1': [None, 384, 384, 3]}
)
spec = coreml_model._spec
print(spec.description.input[0])
print(spec.description.input[0].type.multiArrayType.shape)
print(spec.description.output[0])
coremltools.utils.save_spec(spec, "test.mlmodel")
The output is:
2 : out, <keras.layers.convolutional.Conv2D object at 0x7f08ca491470>
3 : out__activation__, <keras.layers.core.Activation object at 0x7f08ca4b0b70>
name: "inp1"
type {
multiArrayType {
shape: 3
shape: 384
shape: 384
dataType: FLOAT32
}
}
[3, 384, 384]
name: "out1"
type {
multiArrayType {
shape: 3
dataType: FLOAT32
}
}
So the output shape is 3, which is incorrect. And when I try to get rid from input_name_shape_dict I get:
Please provide a finite height (H), width (W) & channel value (C) using input_name_shape_dict arg with key = 'inp1' and value = [None, H, W, C]
Converted .mlmodel can be modified to have flexible input shape using coremltools.models.neural_network.flexible_shape_utils
So it wants NHWC.
Attempt of inference yields:
Layer 'conv_in' of type 'Convolution' has input rank 3 but expects rank at least 4
When I attempt to add extra dimension to input:
spec.description.input[0].type.multiArrayType.shape.extend([1, 3, 384, 384])
del spec.description.input[0].type.multiArrayType.shape[0]
del spec.description.input[0].type.multiArrayType.shape[0]
del spec.description.input[0].type.multiArrayType.shape[0]
[name: "inp1"
type {
multiArrayType {
shape: 1
shape: 3
shape: 384
shape: 384
dataType: FLOAT32
}
}
]
I get for inference:
Shape (1 x 384 x 384 x 3) was not in enumerated set of allowed shapes
Following this advice and making input shape (1,1,384,384,3) dos not help.
How can I make it working and producing correct output?
Inference:
From PIL import Image
model_cml = coremltools.models.MLModel('my.mlmodel')
# load image
img = np.array(Image.open('patch4.png').convert('RGB'))[np.newaxis,...]/127.5 - 1
# Make predictions
predictions = model_cml.predict({'inp1':img})
# save result
res = predictions['out1']
res = np.clip((res[0]+1)*127.5,0,255).astype(np.uint8)
Image.fromarray(res).save('out32.png')
UPDATE:
I am able to run this model with inputs (3,1,384,384), the result produces is (1,3,3,384,384) which does not make any sense for me.
UPDATE 2:
setting fixed shape in Keras
def MyModel(inputs_shape=(384,384,3), channels=64):
inpt = Input(shape=inputs_shape)
fixed output shape problem, but I still cannot run the model (Layer 'conv_in' of type 'Convolution' has input rank 3 but expects rank at least 4)
UPDATE:
The following works to get rid of input and conv_in shapes mismatch.
1). Downgrade to coremltools==3.0. Version 3.3 (model version 4) seems broken.
2.) Use fixed shape in keras model, no input_shape_dist and variable shape for coreml model
from keras.layers import *
from keras import Input, Model
import coremltools
def MyModel(inputs_shape=(384,384,3), channels=64):
inpt = Input(shape=inputs_shape)
# channels
skip = Conv2D(channels, (3, 3), strides=1, activation=None, padding='same', name='conv_in')(inpt)
out = Conv2D(3, (3, 3), strides=1, padding='same', activation='tanh',name='out')(skip)
return Model(inputs=inpt, outputs=out)
model = MyModel()
model.save('test.model')
print(model.summary())
'''
# v.3.3
coreml_model = coremltools.converters.keras.convert(model,
input_names=["image"],
output_names="out1",
image_scale=1.0,
model_precision='float32',
use_float_arraytype=True,
input_name_shape_dict={'inp1': [None, 384, 384, 3]}
)
'''
coreml_model = coremltools.converters.keras.convert(model,
input_names=["image"],
output_names="out1",
image_scale=1.0,
model_precision='float32',
)
spec = coreml_model._spec
from coremltools.models.neural_network import flexible_shape_utils
shape_range = flexible_shape_utils.NeuralNetworkMultiArrayShapeRange()
shape_range.add_channel_range((3,3))
shape_range.add_height_range((64, 384))
shape_range.add_width_range((64, 384))
flexible_shape_utils.update_multiarray_shape_range(spec, feature_name='image', shape_range=shape_range)
print(spec.description.input)
print(spec.description.input[0].type.multiArrayType.shape)
print(spec.description.output)
coremltools.utils.save_spec(spec, "my.mlmodel")
In the inference script, feed array of the shape (1,1,3,384,384):
img = np.zeros((1,1,3,384,384))
# Make predictions
predictions = model_cml.predict({'inp1':img})
res = predictions['out1'] # (3, 384,384)
You can ignore what the mlmodel file has in the output shape if it is incorrect. This is more of a metadata issue, i.e. the model will still work fine and do the right thing. The converter isn't always able to figure out the correct output shape (not sure why).

Found input variables with inconsistent numbers of samples: [2, 8382]

my method has 2 input data model
network
branches:
49The first branch is composed of an embedding followed by simple Multi-layer
Perceptron (MLP) designed to handle input of the product description.
The second branch is a CNN to operate over the product image data.
These branches are then be concatenated together to form the final.
The problem is when we try to split the data with train_test_split by Cross Validation, It given as this error.
ValueError: Found input variables with inconsistent numbers of samples: [2, 8382]
MLP and CNN
def create_mlp(dim, regress=False):
# define our MLP network
model = Sequential()
model.add(Dense(8, input_dim=dim, activation="relu"))
model.add(Dense(4, activation="relu"))
# check to see if the regression node should be added
if regress:
model.add(Dense(1, activation="linear"))
# return our model
return model
def create_cnn(width, height, depth, filters=(64, 32, 16), regress=False):
# initialize the input shape and channel dimension, assuming
# TensorFlow/channels-last ordering
inputShape = (height, width, depth)
chanDim = -1
# define the model input
inputs = Input(shape=inputShape)
# loop over the number of filters
for (i, f) in enumerate(filters):
# if this is the first CONV layer then set the input
# appropriately
if i == 0:
x = inputs
# CONV => RELU => BN => POOL
x = Conv2D(f, (3, 3), padding="same")(x)
x = Activation("relu")(x)
x = BatchNormalization(axis=chanDim)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
# flatten the volume, then FC => RELU => BN => DROPOUT
x = Flatten()(x)
x = Dense(16)(x)
x = Activation("relu")(x)
x = BatchNormalization(axis=chanDim)(x)
x = Dropout(0.5)(x)
# apply another FC layer, this one to match the number of nodes
# coming out of the MLP
x = Dense(4)(x)
x = Activation("relu")(x)
# check to see if the regression node should be added
if regress:
x = Dense(1, activation="linear")(x)
# construct the CNN
model = Model(inputs, x)
# return the CNN
return model
mlp = create_mlp(trainEmbedX.shape[1], regress=False)
cnn = create_cnn(64, 64, 3, regress=False)
combinedInput = concatenate([mlp.output, cnn.output])
x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=[mlp.input, cnn.input], outputs=x)
model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer="adam") # binary_crossentropy
The error occurs here
n_folds=3
epochs=3
batch_size=128
#save the model history in a list after fitting so that we can plot later
model_history = []
for i in range(n_folds):
print("Training on Fold: ",i+1)
t_x, val_x, t_y, val_y = train_test_split([trainEmbedX,trainImagesX], trainY, test_size = 0.2, random_state = np.random.randint(1,1000, 1)[0])
model_history.append(fit_and_evaluate(t_x, val_x, t_y, val_y, epochs, batch_size))
print("======="*12, end="\n\n\n")
Training on Fold: 1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-84-651638774259> in <module>
7 for i in range(n_folds):
8 print("Training on Fold: ",i+1)
----> 9 t_x, val_x, t_y, val_y = train_test_split([trainEmbedX,trainImagesX], trainY, test_size = 0.2, random_state = np.random.randint(1,1000, 1)[0])
10 model_history.append(fit_and_evaluate(t_x, val_x, t_y, val_y, epochs, batch_size))
11 print("======="*12, end="\n\n\n")
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/model_selection/_split.py in train_test_split(*arrays, **options)
2182 test_size = 0.25
2183
-> 2184 arrays = indexable(*arrays)
2185
2186 if shuffle is False:
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/utils/validation.py in indexable(*iterables)
258 else:
259 result.append(np.array(X))
--> 260 check_consistent_length(*result)
261 return result
262
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
233 if len(uniques) > 1:
234 raise ValueError("Found input variables with inconsistent numbers of"
--> 235 " samples: %r" % [int(l) for l in lengths])
236
237
ValueError: Found input variables with inconsistent numbers of samples: [2, 8382]
This error happens with mismatching dimensions of X and Y in train_test_split.
By looking at your snippet, you try to concatenate two arrays by [trainEmbedX,trainImagesX] which will add a dimension if the original arrays trainEmbedX and trainImagesX are not 1D, hence you have the shape [2, 8382] in the error.
So instead of [trainEmbedX,trainImagesX], I suggest to use np.concatenate to merge these two arrays by np.concatenate((trainEmbedX,trainImagesX),axis=1).

Proper models import from pytorch in keras

How to import this model from pytorch to keras? I write model from post bottom but Keras and pytorch models give different results.
class net_pytorch(torch.nn.Module):
def __init__(self,Nin=6,Nout=1,Nlinear=112*60):
super(vel_regressor, self).__init__()
self.model1 = torch.nn.Sequential(
torch.nn.Conv1d(Nin,60,kernel_size=3,stride=1,groups=Nin),
torch.nn.ReLU(),
torch.nn.Conv1d(60,120,kernel_size=3,stride=1,groups=Nin),
torch.nn.ReLU(),
torch.nn.Conv1d(120,240,kernel_size=3,stride=1),
torch.nn.ReLU(),
torch.nn.MaxPool1d(10, stride=6),
)
self.model2=model2=torch.nn.Sequential(
torch.nn.Linear(Nlinear, 10*40),
torch.nn.ReLU(),
torch.nn.Linear(10*40, 100),
torch.nn.ReLU(),
torch.nn.Linear(100, Nout)
)
def forward(self, x):
x = self.model1(x)
x = x.view(x.size(0), -1)
x = self.model2(x)
return x
How I write it in keras:
def net_keras():
model2 = Sequential()
model2.add(layers.SeparableConv1D(60, 3, strides=1, activation='relu', depth_multiplier = 6 , name = 'model1.0', input_shape=(200, 6)))
model2.add(layers.SeparableConv1D(120, 3, strides=1, activation='relu', depth_multiplier = 6, name = 'model1.2'))
model2.add(layers.SeparableConv1D(240, 3, strides=1, activation='relu', name = 'model1.4'))
model2.add(layers.GlobalAveragePooling1D())
model2.add(layers.Dense(6720, activation='relu', name = 'model2.0'))
model2.add(layers.Dense(400, activation='relu', name = 'model2.2'))
model2.add(layers.Dense(100, activation='relu', name = 'model2.4'))
model2.add(layers.Dense(3))
model2.compile(optimizer=Adam(), loss='mae')
return model2
I try to use nn-transfer to convert but have error:
Layer names in PyTorch state_dict ['model1.0', 'model1.2', 'model1.4', 'model2.0', 'model2.2', 'model2.4']
Layer names in Keras HDF5 ['dense_1', 'global_average_pooling1d_1', 'model1.0', 'model1.2', 'model1.4', 'model2.0', 'model2.2', 'model2.4']
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-49-a3986379ed2b> in <module>()
----> 1 transfer.pytorch_to_keras(pytorch_network, model2)
/content/nn-transfer/nn_transfer/transfer.py in pytorch_to_keras(pytorch_model, keras_model, flip_filters, flip_channels, verbose)
122 for layer in pytorch_layer_names:
123
--> 124 params = util.dig_to_params(model_weights[layer])
125
126 weight_key = layer + '.weight'
/content/nn-transfer/nn_transfer/util.py in dig_to_params(keras_h5_layer)
23 # ['dense_2']['dense_3']['conv2d_7']['dense_4']['conv1']
24 while not _contains_weights(keras_h5_layer):
---> 25 keras_h5_layer = keras_h5_layer[list(keras_h5_layer.keys())[0]]
26
27 return keras_h5_layer
AttributeError: 'Dataset' object has no attribute 'keys'
Also I try to use pytorch2keras but it dont work with groups != 1.
MMdnn also dont work with this model (error in image).
MMdnn error
Here is the error place :
model.add(layers.Conv1D(60, 3, strides=1, activation='relu', input_shape=(None, 200), name='model1.0'))
You should use input_shape=(None,6) unless your input is always BatchSizex200x6.
In addition, there are tools to convert models between different architectures, e.g. https://github.com/Microsoft/MMdnn.

keras LSTM model input and output dimensions mismatch

model = Sequential()
model.add(Embedding(630, 210))
model.add(LSTM(1024, dropout = 0.2, return_sequences = True))
model.add(LSTM(1024, dropout = 0.2, return_sequences = True))
model.add(Dense(210, activation = 'softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
filepath = 'ner_2-{epoch:02d}-{loss:.5f}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor = 'loss', verbose = 1, save_best_only = True, mode = 'min')
callback_list = [checkpoint]
model.fit(X, y , epochs = 20, batch_size = 1024, callbacks = callback_list)
X: the input vector is of the shape (204564, 630, 1)
y: the target vector is of the shape (204564, 210, 1)
i.e. for every 630 inputs 210 outputs have to be predicted but the code throws the following error on compilation
ValueError Traceback (most recent call last)
<ipython-input-57-05a6affb6217> in <module>()
50 callback_list = [checkpoint]
51
---> 52 model.fit(X, y , epochs = 20, batch_size = 1024, callbacks = callback_list)
53 print('successful')
ValueError: Error when checking model input: expected embedding_8_input to have 2 dimensions, but got array with shape (204564, 630, 1)
Please someone explain why this error is occurring and how to solve this
The message says:
Your first layer expects an input with 2 dimensions: (BatchSize, SomeOtherDimension). But your input has 3 dimensions (BatchSize=204564,SomeOtherDimension=630, 1).
Well... remove the 1 from your input, or reshape it inside the model:
Solution 1 - Removing it from the input:
X = X.reshape((204564,630))
Solution 2 - Adding a reshape layer:
model = Sequential()
model.add(Reshape((630,),input_shape=(630,1)))
model.add(Embedding.....)

How do I create a variable-length input LSTM in Keras?

I am trying to do some vanilla pattern recognition with an LSTM using Keras to predict the next element in a sequence.
My data look like this:
where the label of the training sequence is the last element in the list: X_train['Sequence'][n][-1].
Because my Sequence column can have a variable number of elements in the sequence, I believe an RNN to be the best model to use. Below is my attempt to build an LSTM in Keras:
# Build the model
# A few arbitrary constants...
max_features = 20000
out_size = 128
# The max length should be the length of the longest sequence (minus one to account for the label)
max_length = X_train['Sequence'].apply(len).max() - 1
# Normal LSTM model construction with sigmoid activation
model = Sequential()
model.add(Embedding(max_features, out_size, input_length=max_length, dropout=0.2))
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2))
model.add(Dense(1))
model.add(Activation('sigmoid'))
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
And here's how I attempt to train my model:
# Train the model
for seq in X_train['Sequence']:
print("Length of training is {0}".format(len(seq[:-1])))
print("Training set is {0}".format(seq[:-1]))
model.fit(np.array([seq[:-1]]), [seq[-1]])
My output is this:
Length of training is 13
Training set is [1, 3, 13, 87, 1053, 28576, 2141733, 508147108, 402135275365, 1073376057490373, 9700385489355970183, 298434346895322960005291, 31479360095907908092817694945]
However, I get the following error:
Exception: Error when checking model input: expected embedding_input_1 to have shape (None, 347) but got array with shape (1, 13)
I believe my training step is correctly setup, so my model construction must be wrong. Note that 347 is max_length.
How can I correctly build a variable-length input LSTM in Keras? I'd prefer not to pad the data. Not sure if it's relevant, but I'm using the Theano backend.
I am not clear about the embedding procedure. But still here is a way to implement a variable-length input LSTM. Just do not specify the timespan dimension when building LSTM.
import keras.backend as K
from keras.layers import LSTM, Input
I = Input(shape=(None, 200)) # unknown timespan, fixed feature size
lstm = LSTM(20)
f = K.function(inputs=[I], outputs=[lstm(I)])
import numpy as np
data1 = np.random.random(size=(1, 100, 200)) # batch_size = 1, timespan = 100
print f([data1])[0].shape
# (1, 20)
data2 = np.random.random(size=(1, 314, 200)) # batch_size = 1, timespan = 314
print f([data2])[0].shape
# (1, 20)
The trick to training and classifying sequences is training with masking and classifying using a stateful network. Here's an example that I made that classifies whether a sequence of variable length starts with zero or not.
import numpy as np
np.random.seed(1)
import tensorflow as tf
tf.set_random_seed(1)
from keras import models
from keras.layers import Dense, Masking, LSTM
import matplotlib.pyplot as plt
def stateful_model():
hidden_units = 256
model = models.Sequential()
model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
return model
def train_rnn(x_train, y_train, max_len, mask):
epochs = 10
batch_size = 200
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_train_sig_cls_pair(t_stops, num_examples, mask):
x = []
y = []
max_t = int(np.max(t_stops))
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, max_t), dtype=np.int8)
sig[one_indices, 0] = 1
sig[:, t_stop:] = mask
x.append(sig)
cls = np.zeros(num_examples, dtype=np.bool)
cls[one_indices] = 1
y.append(cls)
return np.concatenate(x, axis=0), np.concatenate(y, axis=0)
def gen_test_sig_cls_pair(t_stops, num_examples):
x = []
y = []
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, t_stop), dtype=np.bool)
sig[one_indices, 0] = 1
x.extend(list(sig))
cls = np.zeros((num_examples, t_stop), dtype=np.bool)
cls[one_indices] = 1
y.extend(list(cls))
return x, y
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
signal_lengths = (10, 15, 20)
x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)
mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)
testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)
state_mod = stateful_model()
state_mod.set_weights(mod.get_weights())
res = []
for s_i in range(len(testing_dat)):
seq_in = list(testing_dat[s_i])
seq_len = len(seq_in)
for t_i in range(seq_len):
res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))
state_mod.reset_states()
fig, axes = plt.subplots(2)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(res, "ro", label="result", alpha=0.2)
axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
plt.show()
Not sure how applicable recurrent networks are for your sequences, ie how strongly dependent each element is on its preceding sequence as opposed to other factors. That being said (which doesn't help you one bit of course), if you don't want to pad your input with some bad value, a stateful model that processes a single timestep at once is the only alternative for variable length sequences IMHO. If you don't mind taking an alternative approach to encoding:
import numpy as np
import keras.models as kem
import keras.layers as kel
import keras.callbacks as kec
import sklearn.preprocessing as skprep
X_train, max_features = {'Sequence': [[1, 2, 4, 5, 8, 10, 16], [1, 2, 1, 5, 5, 1, 11, 16, 7]]}, 16
num_mem_units = 64
size_batch = 1
num_timesteps = 1
num_features = 1
num_targets = 1
num_epochs = 1500
model = kem.Sequential()
model.add(kel.LSTM(num_mem_units, stateful=True, batch_input_shape=(size_batch, num_timesteps, num_features),
return_sequences=True))
model.add(kel.Dense(num_targets, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam')
range_act = (0, 1) # sigmoid
range_features = np.array([0, max_features]).reshape(-1, 1)
normalizer = skprep.MinMaxScaler(feature_range=range_act)
normalizer.fit(range_features)
reset_state = kec.LambdaCallback(on_epoch_end=lambda *_ : model.reset_states())
# training
for seq in X_train['Sequence']:
X = seq[:-1]
y = seq[1:] # predict next element
X_norm = normalizer.transform(np.array(X).reshape(-1, 1)).reshape(-1, num_timesteps, num_features)
y_norm = normalizer.transform(np.array(y).reshape(-1, 1)).reshape(-1, num_timesteps, num_targets)
model.fit(X_norm, y_norm, epochs=num_epochs, batch_size=size_batch, shuffle=False,
callbacks=[reset_state])
# prediction
for seq in X_train['Sequence']:
model.reset_states()
for istep in range(len(seq)-1): # input up to not incl last
val = seq[istep]
X = np.array([val]).reshape(-1, 1)
X_norm = normalizer.transform(X).reshape(-1, num_timesteps, num_features)
y_norm = model.predict(X_norm)
yhat = int(normalizer.inverse_transform(y_norm[0])[0, 0])
y = seq[-1] # last
put = '{0} predicts {1:d}, expecting {2:d}'.format(', '.join(str(val) for val in seq[:-1]), yhat, y)
print(put)
which produces sth like:
1, 2, 4, 5, 8, 10 predicts 11, expecting 16
1, 2, 1, 5, 5, 1, 11, 16 predicts 7, expecting 7
with ridiculous loss, however.
It turns out that you can do this using ragged inputs.
Firstly, you need to convert your input data to classes using the to_categorical function
from tensorflow.keras.utils import to_categorical
from tensorflow.ragged import constant
X_train = constant(list(map(lambda x: to_categorical(x, num_classes=max_features),X_train)))
Then, you need to edit your model slightly:
model = Sequential()
model.add(Input((None,max_features),ragged=True)) # use this instead of an Embedding
model.add(Embedding(max_features, out_size, input_length=max_length, dropout=0.2))
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2))
model.add(Dense(1))
model.add(Activation('sigmoid'))
And then work from there!

Resources