Wrong dimensions when trying to use model.predict() from Keras - python-3.x

I think the code will speak for itself, but i trained a model, that i now wanna use to predict on some new input data. The new input data seems to be the wrong dimensions though. Below you can see the code and error messages for both the model and the predicting (attempted)
tokenizer = Tokenizer(num_words=10000)
df = pd.read_csv('/home/paperspace/Sentiment Analysis Dataset.csv', index_col = 0,
error_bad_lines = False)
y = list(df['Sentiment'])
tokenizer.fit_on_texts(list(df['SentimentText']))
X = tokenizer.texts_to_sequences(list(df['SentimentText']))
X = pad_sequences(X)
print("Done, fitting on texts.")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle = True)
model = Sequential()
#Creates the wordembeddings.
embedding_vector_dim = 32
model.add(Embedding(10000, embedding_vector_dim, input_length=X.shape[1]))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
model.fit(numpy.array(X_train), numpy.array(y_train),
batch_size=128,
epochs=1,
validation_data=(numpy.array(X_test), numpy.array(y_test)))
score, acc = model.evaluate(numpy.array(X_test),numpy.array(y_test),
batch_size=128)
model.save('./sentiment_seq.h5')
print('Test score:', score)
print('Test accuracy:', acc)
Now for the trying to predict and error message.
text = "this is actually a very bad movie."
tokenizer = Tokenizer()
tokenizer.fit_on_texts(list(text))
X = tokenizer.texts_to_sequences(list(text))
X = pad_sequences(X)
X_flat = np.array([X.flatten()])
model = load_model('sentiment_test.h5')
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print(model.predict(X, batch_size = 1, verbose = 1))
ValueError: Error when checking : expected embedding_1_input to have shape (None, 116) but got array with shape (1, 38)
So basically why am i getting this error, when preprocessing is the same when training and predicting, and how can i know what the expected input should be before seeing the error message?

If you're not working with a fixed input length, you should not define an input_length in the embedding layer.

Related

How to predict the class probability with an array

I have an array of data and I'm trying to predict the probability if it's 1 or 0
I have a data set with more than 3000 rows as features and output data is either 1 or 0.
I'm quite new with neural networks, so I found an example online but now I'm having difficulties how to predict with unknown data.
In my case I want to predict the probability of 1 for row variable.
Here's the code
df = pd.read_csv("data.csv")
X = df.iloc[:,10:]
Y = df['output']
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# larger model
def create_larger():
# create model
model = Sequential()
model.add(Dense(60, input_shape=(25,), activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(model=create_larger, epochs=50, batch_size=5, verbose=2)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=5, shuffle=True)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
row = [[4, 0.558, 0.493, 0.954, 0.895, 0.683, 8.7, 26, 0.155, 8.3, 21.8, 0.21, 0.723, 0.548, 0.466, 0.979, 0.887, 0.464, 11.8, 25.5, 0.184, 7.5, 18, 0.217, 0.651]]
scaler = StandardScaler()
row = np.array(row)
scaled_row = pipeline.fit(row)
print(pipeline.predict(scaled_row))
If I run this code I get an error
ValueError: Expected array-like (array or non-string sequence), got None
So now I'm kinda lost what to change it.
Thanks.

Keras fit_generator() not working due to shape error

I am running MNIST prediction using Keras, with tensorflow backend.
I have code that runs with batches , using Keras fit() as
(X_train, y_train), (X_test, y_test) = mnist.load_data()
N1 = X_train.shape[0]
N2 = X_test.shape[0]
h = X_train.shape[1]
w = X_train.shape[2]
num_pixels = h*w
# reshape N1 samples to num_pixels
x_train = X_train.reshape(N1, num_pixels).astype('float32') # shape is now (60000,784)
x_test = X_test.reshape(N2, num_pixels).astype('float32') # shape is now (10000,784)
x_train = x_train / 255
x_test = x_test / 255
y_train = np_utils.to_categorical(y_train) #(60000,10)
y_test = np_utils.to_categorical(y_test) # (10000,10):
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
batch_size = 200
epochs = 20
max_batches = 2 * len(x_train) / batch_size # 2*60000/200
# reshape to be [samples][width][height][ channel] for ImageDataGenerator
x_t = X_train.reshape(N1, w, h, 1).astype('float32')
datagen = ImageDataGenerator(rescale= 1./255)
train_gen = datagen.flow(x_t, y_train, batch_size=batch_size)
for e in range(epochs):
batches = 0
for x_batch, y_batch in train_gen:
# x_batch is of size [batch_sz,w,h,ch]: resize to [bth_sz,pixel_sz]: (200,28,28,1)-> (200,784)
# for model.fit
x_batch = np.reshape(x_batch, [-1, num_pixels])
model.fit(x_batch, y_batch,validation_split=0.15,verbose=0)
batches += 1
print("Epoch %d/%d, Batch %d/%d" % (e+1, epochs, batches, max_batches))
if batches >= max_batches:
break
scores = model.evaluate(x_test, y_test, verbose=0)
However, when I try to implement similar code using fit_generator(), I get an error.
the code is as below:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# separate data into train and validation
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.15, shuffle= True)
# number of training samples
N1 = X_train.shape[0] # training size
N2 = X_test.shape[0] # test size
N3 = X_valid.shape[0] # valid size
h = X_train.shape[1]
w = X_train.shape[2]
num_pixels = h*w
y_train = np_utils.to_categorical(y_train)
y_valid = np_utils.to_categorical(y_valid)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
batch_size = 200
epochs = 20
steps_per_epoch_tr = int(N1/ batch_size) # 51000/200
steps_per_epoch_val = int(N3/batch_size)
# reshape to be [samples][width][height][ channel] for ImageData Gnerator->datagen.flow
x_t = X_train.reshape(N1, w, h, 1).astype('float32')
x_v = X_valid.reshape(N3, w, h, 1).astype('float32')
# define data preparation
datagen = ImageDataGenerator(rescale=1./255) # scales x_t/x_v
train_gen = datagen.flow(x_t, y_train, batch_size=batch_size)
valid_gen = datagen.flow(x_v,y_valid, batch_size=batch_size)
model.fit_generator(train_gen,steps_per_epoch = steps_per_epoch_tr,validation_data = valid_gen,
validation_steps = steps_per_epoch_val,epochs=epochs)
This gives an error:
This is due to expected image dimension error, but I am not sure where/how to fix this. any help is greatly appreciated.
Thanks
sedy
In the model.fit() case, this line flattened the input before feeding it for training.
x_batch = np.reshape(x_batch, [-1, num_pixels])
But in the generator case, there is nothing to flatten the input before feeding it to the Dense layer. The Dense layer cannot process 2D input (28 x 28). Adding, a Flatten() layer to the model should do the trick as shown below.
def baseline_model():
# create model
model = Sequential()
model.add(Flatten(input_shape=(28,28,1)))
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model

Universal Sentence Encoder Error: Input 0 is incompatible with layer conv1d_6: expected ndim=3, found ndim=2

I'm worked on sentiment analysis task using universal sentence encoder embed_size=512 with CNN but have an error says: Input 0 is incompatible with layer conv1d_6: expected ndim=3, found ndim=2.
and wanna know if this is right to add universal sentence encoder with CNN in this way or not?
pickle_in=open("X.pickle","rb")
X=pickle.load(pickle_in)
X = X.tolist() #convert x to list as The embedding code works once I
converted
the pandas.series data type to list.
X = np.array(X, dtype=object)[:, np.newaxis]
pickle_in=open("Y.pickle","rb")
Y=pickle.load(pickle_in)
Y = np.asarray(pd.get_dummies(Y), dtype = np.int8)
import tensorflow as tf
import tensorflow_hub as hub
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3"
embed = hub.Module(module_url)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.15,
random_state = 42)
X_train, X_Val, Y_train, Y_Val = train_test_split(X_train,Y_train, test_size
= 0.15, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)
print(X_Val.shape,Y_Val.shape)
type(Y_test)
embed_size = embed.get_output_info_dict()['default'].get_shape()[1].value
def UniversalEmbedding(x):
return embed(tf.squeeze(tf.cast(x, tf.string)),
signature="default", as_dict=True)["default"]
import keras
seed=7
np.random.seed(seed)
from keras.layers import Input, Dense, concatenate, Activation,
GlobalMaxPooling1D
from keras import layers
from keras.models import Model
input_text = layers.Input(shape=(1,), dtype=tf.string)
embedding = layers.Lambda(UniversalEmbedding,
output_shape=(embed_size,))(input_text)
bigram_branch = Conv1D(filters=64, kernel_size=1, padding='same',
activation='relu', strides=1)(embedding)
bigram_branch = GlobalMaxPooling1D()(bigram_branch)
trigram_branch = Conv1D(filters=64, kernel_size=2, padding='same',
activation='relu', strides=1)(embedding)
trigram_branch = GlobalMaxPooling1D()(trigram_branch)
fourgram_branch = Conv1D(filters=64, kernel_size=3, padding='same',
activation='relu', strides=1)(embedding)
fourgram_branch = GlobalMaxPooling1D()(fourgram_branch)
merged = concatenate([bigram_branch, trigram_branch, fourgram_branch],
axis=1)
merged = Dense(512, activation='relu')(merged)
merged = Dropout(0.8)(merged)
merged = Dense(2)(merged)
output = Activation('sigmoid')(merged)
model = Model(inputs=[tweet_input], outputs=[output])
adam=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None,
decay=0.0, amsgrad=False)
model.compile(loss='mean_squared_error',
optimizer= adam,
metrics=['accuracy'])
model.summary()
You can not directly pass Universal Sentence Encoder to Conv1D because Conv1D expected a tensor with shape [batch, sequence, feature] while the output of Universal Sentence Encoder is [batch, feature]. It is also stated in tfhub.dev:
The input is variable length English text and the output is a 512
dimensional vector.
How can I fix this?
In my view, the easiest possible solution is to use ELMo on Tensorhub. With ELMo you can map each sentence to [batch, sequence, feature] and then feed into the Conv1D.

Keras ValueError: expected dense_1 to have shape (3,) but got array with shape (4,)

I've been trying to figure out how to use Keras when reading data from a .csv[1]. I have the following code:
dataframe = pd.read_csv("iris.csv", header=None)
dataset = dataframe.values
X = dataset[:, 0:4].astype(float)
Y = dataset[:, 4]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
one_hot_y = keras.utils.to_categorical(encoded_Y)
X_train = X[:100]
X_test = X[50:]
Y_train = one_hot_y[:100]
Y_test = one_hot_y[50:]
print(X_train.shape)
# define baseline model
def baseline_model():
# create model
model = keras.models.Sequential()
model.add(keras.layers.Dense(8, input_shape=(4, ), activation='relu'))
model.add(keras.layers.Dense(3, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
history = model.fit(X_train, X_test, epochs=5)
test_loss, test_acc = model.evaluate(Y_train, Y_test)
print('Test accuracy:', test_acc)
However, when I run this, I get the error:
ValueError: Error when checking target: expected dense_1 to have shape (3,) but got array with shape (4,)
I find this odd as I was sure to set input_shape=(4, ). Any help with this would be appreciated.
[1] The CSV looks as follows:
5.1,3.5,1.4,0.2,Iris-setosa
...
You have only 3 output neurons, but the data you are using obviously has 4 classes, so you need to change this line:
model.add(keras.layers.Dense(3, activation='softmax'))
from 3 output classes to 4 output classes:
model.add(keras.layers.Dense(4, activation='softmax'))

Error in last layer of neural network

#10-Fold split
seed = 7
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
np.random.seed(seed)
cvscores = []
act = 'relu'
for train, test in kfold.split(X, Y):
model = Sequential()
model.add(Dense(43, input_shape=(8,)))
model.add(Activation(act))
model.add(Dense(500))
model.add(Activation(act))
#model.add(Dropout(0.4))
model.add(Dense(1000))
model.add(Activation(act))
#model.add(Dropout(0.4))
model.add(Dense(1500))
model.add(Activation(act))
#model.add(Dropout(0.4))
model.add(Dense(2))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
hist = model.fit(X[train], Y[train],
epochs=500,
shuffle=True,
batch_size=100,
validation_data=(X[test], Y[test]), verbose=2)
#model.summary()
When I call model.fit it reports the following error :
ValueError: Error when checking target: expected activation_5 to have shape (None, 2) but got array with shape (3869, 1)
I am using keras with TensorFlow backend. Please ask for any further clarification if needed.
The problem was solved when we used this statement
y = to_categorical(Y[:])

Resources