Error in creating h5 file (hdf file) - keras

For below code i have save models weights in mnist_weights1234.h5. and want to create same file like mnist_weights1234.h5 with same layer configuration
import keras
from __future__ import print_function
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import numpy as np
from sklearn.model_selection import train_test_split
batch_size = 128
num_classes = 3
epochs = 1
# input image dimensions
img_rows, img_cols = 28, 28
#Just for reducing data set
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x1_train=x_train[y_train==0]; y1_train=y_train[y_train==0]
x1_test=x_test[y_test==0];y1_test=y_test[y_test==0]
x2_train=x_train[y_train==1];y2_train=y_train[y_train==1]
x2_test=x_test[y_test==1];y2_test=y_test[y_test==1]
x3_train=x_train[y_train==2];y3_train=y_train[y_train==2]
x3_test=x_test[y_test==2];y3_test=y_test[y_test==2]
X=np.concatenate((x1_train,x2_train,x3_train,x1_test,x2_test,x3_test),axis=0)
Y=np.concatenate((y1_train,y2_train,y3_train,y1_test,y2_test,y3_test),axis=0)
# the data, shuffled and split between train and test sets
x_train, x_test, y_train, y_test = train_test_split(X,Y)
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(1, kernel_size=(2, 2),
activation='relu',
input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(16,16)))
model.add(Flatten())
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.save_weights('mnist_weights1234.h5')
Now i want to create file like mnist_weights.h5. So i use below code and getting error.
hf = h5py.File('mnist_weights12356.h5', 'w')
hf.create_dataset('conv2d_2/conv2d_2/bias', data=weights[0])
hf.create_dataset('conv2d_2/conv2d_2/kernel', data=weights[1])
hf.create_dataset('dense_2/dense_2/bias', data=weights[2])
hf.create_dataset('dense_2/dense_2/kernel', data=weights[3])
hf.create_dataset('flatten_2', data=None)
hf.create_dataset('max_pooling_2d_2', data=None)
hf.close()
But getting following error:TypeError: One of data, shape or dtype must be specified.
How to solve problem

If you want to use weights that are in numpy arrays, simply set the weights in the layers:
model.get_layer('conv2d_2').set_weights([weights[1],weights[0]])
model.get_layer('dense_2').set_weights([weights[3],weights[2]])
If your arrays are stored in files:
array = numpy.load('arrayfile.npy')
You can save the entire model weights as numpy arrays:
numpy.save('weights.npy', model.get_weights())
model.set_weights(numpy.load('weights.npy'))

The error message has your solution. In these lines:
hf.create_dataset('flatten_2', data=None)
hf.create_dataset('max_pooling_2d_2', data=None)
You are giving data equals to None. To create a dataset, the HDF5 library needs a minimum information, and as the error says, you either need to give a dtype (the data type of the dataset' elements), or a non-None data parameter (to infer the shape), or a shape parameter. You are giving none of these, so the error is correct.
Just give enough information in the create_dataset call for a dataset ti be created.

Related

ValueError: Dimensions must be equal (keras)

I'm trying to train an autoencoder but have problems in reshaping my X_train to fit it to my model model().
from tensorflow import keras
from keras.layers import *
from keras.models import Model
from keras.models import Sequential
from keras.optimizers import Adam
from keras.optimizers import RMSprop
from keras.utils import plot_model
X_train = np.array(X_train, dtype=np.float)
X_test =np.array(X_train, dtype=np.float)
X_train = X_train.reshape(len(X_train), 100,1)
X_test = X_test.reshape(len(X_test), 100,1)
#inputs = Input(shape=(230, 1,100))
epoch = 100
batch = 128
def model():
m = Sequential()
# ##m.add(Reshape((,)))
m.add(Flatten())
m.add(Dense(512, activation='relu'))
m.add(Dense(128, activation = 'relu'))
m.add(Dense(2, activation = 'linear'))
m.add(Dense(128, activation = 'relu'))
m.add(Dense(512, activation = 'relu'))
m.add(Dense(784, activation = 'sigmoid'))
m.compile(loss='mean_squared_error', optimizer = 'rmsprop', metrics = ['accuracy'])
# Fit data to model m
m.fit(X_train, X_train, batch_size = batch, epochs = epoch)
m.summary()
#score = m.evaluate(X_test, Y_test, verbose = 0)
#print('Test loss:' score[0])
#print('Test accuracy:', score[1])
#m.summary()
mod = model()
The of dimension of my data is the following:
X_train = (523, 100,1)
X_test = (523, 100,1)
To fix your issue, change the following:
X_train = X_train.reshape((-1, 100))
X_test = X_test.reshape((-1, 100))
Delete the Flatten layer and use 100 neurons for the last layer as stated in the comments.

How to use KerasClassifier validation split and using scitkit learn GridSearchCV

I want to try to test some hyperparameters, thats i want to use the GridSearchCV, because it seems like thats the way to do it.
But i also want to use the validation split. To use Callsbacks like EarlyStopping or/and ReduceLROnPlateau. So my question is:
How do i implement GridSearchCV + validation_split correctly that none of the data in validation split is using for training and the whole training set is used to train my model?
Afaik GridSearchCV split again my remaining train data (which is 1-validation_split) and split it again? I get kinda high accuracy and im thinking that i dont split the data correctly
model = KerasClassifier(build_fn=create_model,verbose=2, validation_split=0.1)
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform',
#'normal',
'uniform',
'he_normal',
#'lecun_normal',
#'he_uniform'
]
epochs = [3] #5,8,10,30
batches = [64] #32,64
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=init)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train)
You can use your self-defined validation data by passing an extra argument to the grid.fit() function that is validation_data=(X_test, Y_test). The documentation, states that grid.fit() function accepts all valid arguments that can be passed to the actual model.fit() function of the default Keras model. Therefore, you can pass the validation data through the grid.fit() function. You may also pass the callback functions there.
I am adding a working code below (applied on MNIST-digit dataset). Notice how I added the validation data on grid.fit() and removed the 'validation_split':
import tensorflow as tf
import numpy as np
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)
X_train = np.expand_dims(X_train, 3)
X_test = np.expand_dims(X_test, 3)
def create_model(optimizer, init):
model = tf.keras.Sequential([
tf.keras.layers.Convolution2D(32, 3, input_shape=(28, 28, 1),
activation='relu', kernel_initializer=init),
tf.keras.layers.Convolution2D(32, 3, activation='relu',
kernel_initializer=init),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(12, activation='relu',
kernel_initializer=init),
tf.keras.layers.Dense(10, activation='softmax',
kernel_initializer=init),
])
model.compile(loss='categorical_crossentropy',
optimizer=optimizer, metrics=['accuracy'])
return model
model = KerasClassifier(build_fn=create_model, verbose=2,)
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform',
#'normal',
'uniform',
'he_normal',
#'lecun_normal',
#'he_uniform'
]
epochs = [4,]
batches = [32, 64]
param_grid = dict(optimizer=optimizers, nb_epoch=epochs,
batch_size=batches, init=init)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train, validation_data=(X_test, Y_test))
Hope this helps. Thanks.

ValueError: Input arrays should have the same number of samples as target arrays LSTM Keras

Here is the part for data preparing - I just want the data to be in the correct shape
x_train , x_test, y_train, y_test = train_test_split(input_data, y , test_size = 0.2 , random_state = 33)
print(x_train.shape)
print(y_train.shape)
(200, 3)
(200, 1)
#Converting them into numpy arrays
input_x_train = x_train.as_matrix()
input_y_train = y_train.as_matrix()
print(input_x_train.shape)
print(input_y_train.shape)
(200, 3)
(200, 1)
input_x_test = x_test.as_matrix()
input_y_test = y_test.as_matrix()
print(input_x_test.shape)
print(input_y_test.shape)
(51, 3)
(51, 1)
#Reshaping into LSTM input format
input_x = input_x_train.reshape((1, input_x_train.shape[0], input_x_train.shape[1]))
print(input_x.shape)
(1, 200, 3)
Then I built my model like this:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
from keras.layers.normalization import BatchNormalization
model = Sequential()
model.add(LSTM(32, input_shape=(200, 3)))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
model.fit(input_x, input_y_train, epochs=1, batch_size=16)
But I am getting this error
ValueError: Input arrays should have the same number of samples as
target arrays. Found 1 input samples and 200 target samples.
The input_shape parameter should not include your batch size. Given that each sample of yours dataset has three features, you should set input_shape=(3,).
In addition, you should not reshape your batch to (1, batch_size, 3). I'm not sure why you're doing that, but as far as I can tell that will break things. Remove that line entirely.

Using tf.data.Dataset as training input to Keras model NOT working

I have a simple code, which DOES work, for training a Keras model in Tensorflow using numpy arrays as features and labels. If I then wrap these numpy arrays using tf.data.Dataset.from_tensor_slices in order to train the same Keras model using a tensorflow dataset, I get an error. I haven't been able to figure out why (it may be a tensorflow or keras bug, but I may also be missing something). I'm on python 3, tensorflow is 1.10.0, numpy is 1.14.5, no GPU involved.
OBS1: The possibility of using tf.data.Dataset as a Keras input is showed in https://www.tensorflow.org/guide/keras, under "Input tf.data datasets".
OBS2: In the code below, the code under "#Train with numpy arrays" is being executed, using numpy arrays. If this code is commented and the code under "#Train with tf.data datasets" is used instead, the error will be reproduced.
OBS3: In line 13, which is commented and starts with "###WORKAROUND 1###", if the comment is removed and the line is used for tf.data.Dataset inputs, the error changes, even though I can't completely understand why.
The complete code is:
import tensorflow as tf
import numpy as np
np.random.seed(1)
tf.set_random_seed(1)
print(tf.__version__)
print(np.__version__)
#Import mnist dataset as numpy arrays
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()#Import
x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
###WORKAROUND 1###y_train, y_test = (y_train.astype(dtype='float32'), y_test.astype(dtype='float32'))
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1]*x_train.shape[2])) #reshaping 28 x 28 images to 1D vectors, similar to Flatten layer in Keras
batch_size = 32
#Create a tf.data.Dataset object equivalent to this data
tfdata_dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
tfdata_dataset_train = tfdata_dataset_train.batch(batch_size).repeat()
#Creates model
keras_model = tf.keras.models.Sequential([
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2, seed=1),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
#Compile the model
keras_model.compile(optimizer='adam',
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
#Train with numpy arrays
keras_training_history = keras_model.fit(x_train,
y_train,
initial_epoch=0,
epochs=1,
batch_size=batch_size
)
#Train with tf.data datasets
#keras_training_history = keras_model.fit(tfdata_dataset_train,
# initial_epoch=0,
# epochs=1,
# steps_per_epoch=60000//batch_size
# )
print(keras_training_history.history)
The error observed when using tf.data.Dataset as input is:
(...)
ValueError: Tensor conversion requested dtype uint8 for Tensor with dtype float32: 'Tensor("metrics/acc/Cast:0", shape=(?,), dtype=float32)'
During handling of the above exception, another exception occurred:
(...)
TypeError: Input 'y' of 'Equal' Op has type float32 that does not match type uint8 of argument 'x'.
The error when removing the comment from line 13, as commented above in OBS3, is:
(...)
tensorflow.python.framework.errors_impl.InvalidArgumentError: In[0] is not a matrix
[[Node: dense/MatMul = MatMul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/dense/MatMul_grad/MatMul_1"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_sequential_input_0_0, dense/MatMul/ReadVariableOp)]]
Any help would be appreciated, including comments that you were able to reproduce the errors, so I can report the bug if it is the case.
I just upgraded to Tensorflow 1.10 to execute this code. I think that is the answer which is also discussed in the other Stackoverflow thread
This code executes but only if I remove the normalization as that line seems to use too much CPU memory. I see messages indicating that. I also reduced the cores.
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
np.random.seed(1)
tf.set_random_seed(1)
batch_size = 128
NUM_CLASSES = 10
print(tf.__version__)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
def tfdata_generator(images, labels, is_training, batch_size=128):
'''Construct a data generator using tf.Dataset'''
def preprocess_fn(image, label):
'''A transformation function to preprocess raw data
into trainable input. '''
x = tf.reshape(tf.cast(image, tf.float32), (28, 28, 1))
y = tf.one_hot(tf.cast(label, tf.uint8), NUM_CLASSES)
return x, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply(tf.contrib.data.map_and_batch(
preprocess_fn, batch_size,
num_parallel_batches=2, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
return dataset
training_set = tfdata_generator(x_train, y_train,is_training=True, batch_size=batch_size)
testing_set = tfdata_generator(x_test, y_test, is_training=False, batch_size=batch_size)
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
keras_model = tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
keras_training_history = keras_model.fit(
training_set.make_one_shot_iterator(),
steps_per_epoch=len(x_train) // batch_size,
epochs=5,
validation_data=testing_set.make_one_shot_iterator(),
validation_steps=len(x_test) // batch_size,
verbose=1)
print(keras_training_history.history)
Installing the tf-nightly build, together with changing dtypes of some tensors (the error changes after installing tf-nightly), solved the problem, so it is an issue which (hopefully) will be solved in 1.11.
Related material: https://github.com/tensorflow/tensorflow/issues/21894
I am wondering how Keras is able to do 5 epochs when the
make_one_shot_iterator() which only supports iterating once through a
dataset?
could be given smth like iterations = len(y_train) * epochs - here shown for tf.v1
the code from Mohan Radhakrishnan still works in tf.v2 with little corrections in objects' belongings to new classes (in tf.v2) fixings - to make the code up-to-date... No more make_one_shot_iterator() needed
# >> author: Mohan Radhakrishnan
import tensorflow as tf
import tensorflow.keras
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
np.random.seed(1)
tf.random.set_seed(1)
batch_size = 128
NUM_CLASSES = 10
print(tf.__version__)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
def tfdata_generator(images, labels, is_training, batch_size=128):
'''Construct a data generator using tf.Dataset'''
def preprocess_fn(image, label):
'''A transformation function to preprocess raw data
into trainable input. '''
x = tf.reshape(tf.cast(image, tf.float32), (28, 28, 1))
y = tf.one_hot(tf.cast(label, tf.uint8), NUM_CLASSES)
return x, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply( tf.data.experimental.map_and_batch(
preprocess_fn, batch_size,
num_parallel_batches=2, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch( tf.data.experimental.AUTOTUNE)
return dataset
training_set = tfdata_generator(x_train, y_train,is_training=True, batch_size=batch_size)
testing_set = tfdata_generator(x_test, y_test, is_training=False, batch_size=batch_size)
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
keras_model = tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
# training_set.make_one_shot_iterator() - 'PrefetchDataset' object has no attribute 'make_one_shot_iterator'
keras_training_history = keras_model.fit(
training_set,
steps_per_epoch=len(x_train) // batch_size,
epochs=5,
validation_data=testing_set,
validation_steps=len(x_test) // batch_size,
verbose=1)
print(keras_training_history.history)
not loading data locally, just easy DataFlow - that is very convinient - Thanks a lot - hope my corrections are proper

How to solve this problem of Memory error?

So I have this error message that ruins all the fun with my work:
Traceback (most recent call last):
File "C:\Python\Python36\Scripts\Masterarbeit-1308\CNN - Kopie.py", line 97, in <module>
model.fit(np.asarray(X_train), np.asarray(Y_train), batch_size=32, epochs=100, verbose=1, validation_data=(np.asarray(X_test), np.asarray(Y_test)))
File "C:\Users\\****\AppData\Roaming\Python\Python36\site-packages\numpy\core\numeric.py", line 492, in asarray
return array(a, dtype, copy=False, order=order)
MemoryError
Does anyone has a solution for this?
I work on a machine i7 7th generation with 16 GB RAM.
To explain more, That's my code, It take al list of arrays (.npy) converted from sounds spectograms to .npy and saved in Input-CNN:
import os, numpy as np
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Activation, Flatten, Conv2D, Dropout, Dense
from keras.layers.normalization import BatchNormalization
import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from keras.utils import to_categorical
folder = 'D:\InputCNN - Copie'
folder1 = 'C:\Python\Python36\Scripts\Masterarbeit-1308\Data'
from keras import backend as K
My_Data = os.listdir(folder)
num_data= len(My_Data)
Classnames = os.listdir(folder1)
class_num = len(Classnames)
arr =[np.load(os.path.join(folder, filename), fix_imports=True) for filename in os.listdir(folder)]
labels = np.ones((num_data,))
labels[0:31]= 0
labels[31:80] = 1
labels[80:128] = 2
labels[128:131] = 3
labels[131:143] = 4
labels[143:157] = 5
labels[157:209] = 6
labels[209:] = 7
Y = to_categorical(labels,class_num)
x, y = shuffle(arr, Y, random_state=2)
dataset = tf.data.Dataset.from_tensor_slices(My_Data)
X_train, X_test, Y_train, Y_test = train_test_split(x, Y, test_size=0.2)
##
def build_model(idx,X,Y,nb_classes):
K.set_image_data_format('channels_last')
nb_filters = 64 # number of convolutional filters to use
pool_size = (2, 2) # size of pooling area for max pooling
kernel_size = (3, 3) # convolution kernel size
nb_layers = 4
input_shape = (X[idx].shape[1], X[idx].shape[2], X[idx].shape[3])
model = Sequential()
model.add(Conv2D(nb_filters, kernel_size, padding='valid', input_shape=input_shape))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))
for layer in range(nb_layers-1):
model.add(Conv2D(nb_filters, kernel_size, padding='valid', input_shape=input_shape))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.6))
model.add(Dense(nb_classes, activation='sigmoid'))
return model
for idx in range(len(X_train)-1):
model = build_model(idx,X_train,Y_train, class_num)
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adadelta',
metrics=['accuracy'])
model.fit(np.array(X_train), np.array(Y_train), batch_size=8, epochs=100, verbose=1, validation_data=(np.array(X_test), np.array(Y_test))) #Here I have the problem
score = model.evaluate(np.array(X_test), np.array(Y_test), verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
The model fit function is the problem in my code, that should train my preconfigured model and returns an history object (A record of the training). I tried np.array and np.asarray and I got the same error message.
If someone think that the model`s summary can be helpful, I'll post it.
I solved this issue. Actually I changed the shape of my data in the list "X_train" (from (218,128,740,1) to (128,740,1)).
I found that, thanks to Keras, it will add automatically another axis with the number of my data injected to the network, and np.asarray works well even with more data.

Resources