Error in reshaping input tokenized text predicting the sentiments in a lstm rnn - keras
I am new to neural network and have been learning it's application in the field of text analytics, so i have used a lstm rnn for the application in python.
After training the model on a dataset of dimension 20,000*1 (2000-being the text and ,1-being the sentiment of the text) i got a good accuracy of 99%, after which i validated the model which was working fine(using the model.predict()function).
Now just to test my model i have been trying to give random text inputs either from a dataframe or variables containing some text but i always landup with the error of reshaping the array , where it is required that the input to the rnn model be of the dimension (1,30).
But when i re-input the training data into the model for prediction , the model works absolutely fine , why is this happening?
link for the screenshot of error
link for image of model summary
training data
I am just stuck here and any kind of suggestion will help me learning more about rnn, i am attaching the error and the rnn model code with this request.
Thank You
Regards
Tushar Upadhyay
import numpy as np
import pandas as pd
import keras
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re
data=pd.read_csv('..../twitter_tushar_data.csv')
max_fatures = 4000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['tweetText'].values)
X = tokenizer.texts_to_sequences(data['tweetText'].values)
X = pad_sequences(X)
embed_dim = 128
lstm_out = 196
model = Sequential()
keras.layers.core.SpatialDropout1D(0.2) #used to avoid overfitting
model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
model.add(LSTM(196, recurrent_dropout=0.2, dropout=0.2))
model.add(Dense(2,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics
= ['accuracy'])
print(model.summary())
#splitting data in training and testing parts
Y = pd.get_dummies(data['SA']).values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size =
0.30, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)
batch_size = 128
model.fit(X_train, Y_train, epochs = 7, batch_size=batch_size, verbose =
2)
validation_size = 3500
X_validate = X_test[-validation_size:]
Y_validate = Y_test[-validation_size:]
X_test = X_test[:-validation_size]
Y_test = Y_test[:-validation_size]
score,acc = model.evaluate(X_test, Y_test, verbose = 2, batch_size = 128)
print("score: %.2f" % (score))
print("acc: %.2f" % (acc))
pos_cnt, neg_cnt, pos_correct, neg_correct = 0, 0, 0, 0
for x in range(len(X_validate)):
result =
model.predict(X_validate[x].reshape(1,X_test.shape[1]),batch_size=1,verbose
= 2)[0]
if np.argmax(result) == np.argmax(Y_validate[x]):
if np.argmax(Y_validate[x]) == 0:
neg_correct += 1
else:
pos_correct += 1
if np.argmax(Y_validate[x]) == 0:
neg_cnt += 1
else:
pos_cnt += 1
print("pos_acc", pos_correct/pos_cnt*100, "%")
print("neg_acc", neg_correct/neg_cnt*100, "%")
I got the solution to my question, it was just a matter of tokenizing the input properly, Thanks !! The code is below for prediction of different user inputs..
text=np.array(['you are a pathetic awful movie'])
print(text.shape)
tk=Tokenizer(num_words=4000,lower=True,split=" ")
tk.fit_on_texts(text)
prediction=model.predict(sequence.pad_sequences(tk.texts_to_sequences(text),
maxlen=max_review_length))
print(prediction)
print(np.argmax(prediction))
Related
Unable to calculate Model performance for Decision Tree Regressor
Although my code run fine on repl and did giving me results but it miserably fails on the Katacoda testing environment. I am attaching the repl file here for your review as well, which also contains the question which is commented just above the code I have written. Kindly review and let me know what mistakes I am making here. Repl Link https://repl.it/repls/WarmRobustOolanguage Also sharing code below Commented is Question Instructions #Import two modules sklearn.datasets, and #sklearn.model_selection. #Import numpy and set random seed to 100. #Load popular Boston dataset from sklearn.datasets module #and assign it to variable boston. #Split boston.data into two sets names X_train and X_test. #Also, split boston.target into two sets Y_train and Y_test. #Hint: Use train_test_split method from #sklearn.model_selection; set random_state to 30. #Print the shape of X_train dataset. #Print the shape of X_test dataset. import sklearn.datasets as datasets from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import RandomizedSearchCV from sklearn.model_selection import cross_val_score import numpy as np np.random.seed(100) max_depth = range(2, 6) boston = datasets.load_boston() X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target, random_state=30) print(X_train.shape) print(X_test.shape) #Import required module from sklearn.tree. #Build a Decision tree Regressor model from X_train set and #Y_train labels, with default parameters. Name the model as #dt_reg. #Evaluate the model accuracy on training data set and print #it's score. #Evaluate the model accuracy on testing data set and print it's score. #Predict the housing price for first two samples of X_test #set and print them.(Hint : Use predict() function) dt_reg = DecisionTreeRegressor(random_state=1) dt_reg = dt_reg.fit(X_train, Y_train) print('Accuracy of Train Data :', cross_val_score(dt_reg, X_train,Y_train, cv=10 )) print('Accuracy of Test Data :', cross_val_score(dt_reg, X_test,Y_test, cv=10 )) predicted = dt_reg.predict(X_test[:2]) print(predicted) #Fit multiple Decision tree regressors on X_train data and #Y_train labels with max_depth parameter value changing from #2 to 5. #Evaluate each model accuracy on testing data set. #Hint: Make use of for loop #Print the max_depth value of the model with highest accuracy. dt_reg = DecisionTreeRegressor() random_grid = {'max_depth': max_depth} dt_random = RandomizedSearchCV(estimator = dt_reg, param_distributions = random_grid, n_iter = 90, cv = 3, verbose=2, random_state=42, n_jobs = -1) dt_random.fit(X_train, Y_train) dt_random.best_params_ def evaluate(model, test_features, test_labels): predictions = model.predict(test_features) errors = abs(predictions - test_labels) mape = 100 * np.mean(errors / test_labels) accuracy = 100 - mape print('Model Performance') print('Average Error: {:0.4f} degrees.'.format(np.mean(errors))) print('Accuracy = {:0.2f}%.'.format(accuracy)) return accuracy best_random = dt_random.best_estimator_ random_accuracy = evaluate(best_random, X_test,Y_test) print("Accuracy Scores of the Model ",random_accuracy) best_parameters = (dt_random.best_params_['max_depth']); print(best_parameters)
The question is asking for default values. Try to remove random_state=1 Current Line: dt_reg = DecisionTreeRegressor(random_state=1) Update Line: dt_reg = DecisionTreeRegressor() I think it should Work!!!
# ================================================================================ # Machine Learning Using Scikit-Learn | 3 | Decision Trees ================================================================================ import sklearn.datasets as datasets import sklearn.model_selection as model_selection import numpy as np from sklearn.tree import DecisionTreeRegressor np.random.seed(100) # Load popular Boston dataset from sklearn.datasets module and assign it to variable boston. boston = datasets.load_boston() # print(boston) # Split boston.data into two sets names X_train and X_test. Also, split boston.target into two sets Y_train and Y_test X_train, X_test, Y_train, Y_test = model_selection.train_test_split(boston.data, boston.target, random_state=30) # Print the shape of X_train dataset print(X_train.shape) # Print the shape of X_test dataset. print(X_test.shape) # Build a Decision tree Regressor model from X_train set and Y_train labels, with default parameters. Name the model as dt_reg dt_Regressor = DecisionTreeRegressor() dt_reg = dt_Regressor.fit(X_train, Y_train) print(dt_reg.score(X_train,Y_train)) print(dt_reg.score(X_test,Y_test)) predicted = dt_reg.predict(X_test[:2]) print(predicted) # Get the max depth maxdepth = 2 maxscore = 0 for x in range(2, 6): dt_Regressor = DecisionTreeRegressor(max_depth=x) dt_reg = dt_Regressor.fit(X_train, Y_train) score = dt_reg.score(X_test, Y_test) if(maxscore < score): maxdepth = x maxscore = score print(maxdepth)
Implementing and tuning a simple CNN for 3D data using Keras Conv3D
I'm trying to implement a 3D CNN using Keras. However, I am having some difficulties understanding some details in the results obtained and further enhancing the accuracy. The data that I am trying to analyzing have the shape {64(d1)x64(d2)x38(d3)}, where d1 and d2 are the length and width of the image (64x64 pixels) and d3 is the time dimension. In other words, I have 38 images. The channel parameter is set to 1 as my data are actually raw data and not really colorful images. My data consist of 219 samples, hence 219x64x64x38. They are divided into training and validation sets with 20% for validation. In addition, I have a fixed 147 additional data for testing. Below is my code that works fine. It creates a txt file that saves the results for the different combination of parameters in my network (grid search). Here in this code, I only consider tuning 2 parameters: the number of filters and lambda for L2 regularizer. I fixed the dropout and the kernel size for the filters. However, later I considered their variations. I also tried to set the seed value so that I have some sort of reproducibility (I don't think that I have achieved this task). My question is that: Given the below architecture and code, I always reach for all the given combinations of parameters a convergence for the training accuracy towards 1 (which is good). However, for the validation accuracy it is most of the time around 80% +/- 4% (rarely below 70%) despite the hyper-parameters combination. Similar behavior for the test accuracy. How can I enhance this accuracy to above 90% ? As far as I know, having a gap between the train and validation/test accuracy is a result from overfitting. However, in my model I am adding dropouts and L2 regularizers and also changing the size of my network which should somehow reduce this gap (but it is not). Is there anything else I can do besides modifying my input data? Does adding more layers help? Or is there maybe a pre-trained 3D CNN like in the case of 2D CNN (e.g., AlexNet)? Should I try ConvLSTM? Is this the limit of this architecture? Thank you :) import numpy as np import tensorflow as tf import keras from keras.models import Sequential from keras.layers import Conv3D, MaxPooling3D, Dense, Flatten, Activation from keras.utils import to_categorical from keras.regularizers import l2 from keras.layers import Dropout from keras.utils import multi_gpu_model import scipy.io as sio from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score from keras.callbacks import ReduceLROnPlateau tf.set_random_seed(1234) def normalize_minmax(X_train): """ Normalize to [0,1] """ from sklearn import preprocessing min_max_scaler = preprocessing.MinMaxScaler() X_minmax_train = min_max_scaler.fit_transform(X_train) return X_minmax_train # generate and prepare the dataset def get_data(): # Load and prepare the data X_data = sio.loadmat('./X_train')['X_train'] Y_data = sio.loadmat('./Y_train')['targets_train'] X_test = sio.loadmat('./X_test')['X_test'] Y_test = sio.loadmat('./Y_test')['targets_test'] return X_data, Y_data, X_test, Y_test def get_model(X_train, Y_train, X_validation, Y_validation, F1_nb, F2_nb, F3_nb, kernel_size_1, kernel_size_2, kernel_size_3, l2_lambda, learning_rate, reduce_lr, dropout_conv1, dropout_conv2, dropout_conv3, dropout_dense, no_epochs): no_classes = 5 sample_shape = (64, 64, 38, 1) batch_size = 32 dropout_seed = 30 conv_seed = 20 # Create the model model = Sequential() model.add(Conv3D(F1_nb, kernel_size=kernel_size_1, kernel_regularizer=l2(l2_lambda), padding='same', kernel_initializer='glorot_uniform', input_shape=sample_shape)) model.add(Activation('selu')) model.add(MaxPooling3D(pool_size=(2,2,2))) model.add(Dropout(dropout_conv1, seed=conv_seed)) model.add(Conv3D(F2_nb, kernel_size=kernel_size_2, kernel_regularizer=l2(l2_lambda), padding='same', kernel_initializer='glorot_uniform')) model.add(Activation('selu')) model.add(MaxPooling3D(pool_size=(2,2,2))) model.add(Dropout(dropout_conv2, seed=conv_seed)) model.add(Conv3D(F3_nb, kernel_size=kernel_size_3, kernel_regularizer=l2(l2_lambda), padding='same', kernel_initializer='glorot_uniform')) model.add(Activation('selu')) model.add(MaxPooling3D(pool_size=(2,2,2))) model.add(Dropout(dropout_conv3, seed=conv_seed)) model.add(Flatten()) model.add(Dense(512, kernel_regularizer=l2(l2_lambda), kernel_initializer='glorot_uniform')) model.add(Activation('selu')) model.add(Dropout(dropout_dense, seed=dropout_seed)) model.add(Dense(no_classes, activation='softmax')) model = multi_gpu_model(model, gpus = 2) # Compile the model model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=learning_rate), metrics=['accuracy']) # Train the model. history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=no_epochs, validation_data=(X_validation, Y_validation),callbacks=[reduce_lr]) return model, history reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001) learning_rate = 0.001 no_epochs = 100 X_data, Y_data, X_test, Y_test = get_data() # Normalize the train/val data for i in range(X_data.shape[0]): for j in range(X_data.shape[3]): X_data[i,:,:,j] = normalize_minmax(X_data[i,:,:,j]) X_data = np.expand_dims(X_data, axis=4) # Normalize the test data for i in range(X_test.shape[0]): for j in range(X_test.shape[3]): X_test[i,:,:,j] = normalize_minmax(X_test[i,:,:,j]) X_test = np.expand_dims(X_test, axis=4) # Shuffle the training data # fix random seed for reproducibility seedValue = 40 permutation = np.random.RandomState(seed=seedValue).permutation(len(X_data)) X_data = X_data[permutation] Y_data = Y_data[permutation] Y_data = np.squeeze(Y_data) Y_test = np.squeeze(Y_test) #Split between train and validation (20%). Here I did not use the classical validation_split=0.2 just to make sure that the data is the same for the different architectures I am using. X_train = X_data[0:175,:,:,:,:] Y_train = Y_data[0:175] X_validation = X_data[176:,:,:,:] Y_validation = Y_data[176:] Y_train = to_categorical(Y_train,num_classes=5).astype(np.integer) Y_validation = to_categorical(Y_validation,num_classes=5).astype(np.integer) Y_test = to_categorical(Y_test,num_classes=5).astype(np.integer) l2_lambda_list = [(1*pow(10,-4)),(2*pow(10,-4)), (3*pow(10,-4)), (4*pow(10,-4)), (5*pow(10,-4)),(6*pow(10,-4)), (7*pow(10,-4)), (8*pow(10,-4)),(9*pow(10,-4)),(10*pow(10,-4)) ] filters_nb = [(128,64,64),(128,64,32),(128,64,16),(128,64,8),(128,32,32),(128,32,16),(128,32,8),(128,16,8),(128,8,8), (64,64,32),(64,64,16),(64,64,8),(64,32,32),(64,32,16),(64,32,8),(64,16,16),(64,16,8),(64,8,8), (32,32,16),(32,32,8),(32,16,16),(32,16,8),(32,8,8), (16,16,16),(16,16,8),(16,8,8) ] DropOut = [(0.25,0.25,0.25,0.5), (0,0,0,0.1),(0,0,0,0.2),(0,0,0,0.3),(0,0,0,0.4),(0,0,0,0.5), (0.1,0.1,0.1,0),(0.2,0.2,0.2,0),(0.3,0.3,0.3,0),(0.4,0.4,0.4,0),(0.5,0.5,0.5,0), (0.1,0.1,0.1,0.1),(0.1,0.1,0.1,0.2),(0.1,0.1,0.1,0.3),(0.1,0.1,0.1,0.4),(0.1,0.1,0.1,0.5), (0.15,0.15,0.15,0.1),(0.15,0.15,0.15,0.2),(0.15,0.15,0.15,0.3),(0.15,0.15,0.15,0.4),(0.15,0.15,0.15,0.5), (0.2,0.2,0.2,0.1),(0.2,0.2,0.2,0.2),(0.2,0.2,0.2,0.3),(0.2,0.2,0.2,0.4),(0.2,0.2,0.2,0.5), (0.25,0.25,0.25,0.1),(0.25,0.25,0.25,0.2),(0.25,0.25,0.25,0.3),(0.25,0.25,0.25,0.4),(0.25,0.25,0.25,0.5), (0.3,0.3,0.3,0.1),(0.3,0.3,0.3,0.2),(0.3,0.3,0.3,0.3),(0.3,0.3,0.3,0.4),(0.3,0.3,0.3,0.5), (0.35,0.35,0.35,0.1),(0.35,0.35,0.35,0.2),(0.35,0.35,0.35,0.3),(0.35,0.35,0.35,0.4),(0.35,0.35,0.35,0.5) ] kernel_size = [(3,3,3), (2,3,3),(2,3,4),(2,3,5),(2,3,6),(2,3,7),(2,3,8),(2,3,9),(2,3,10),(2,3,11),(2,3,12),(2,3,13),(2,3,14),(2,3,15), (3,3,3),(3,3,4),(3,3,5),(3,3,6),(3,3,7),(3,3,8),(3,3,9),(3,3,10),(3,3,11),(3,3,12),(3,3,13),(3,3,14),(3,3,15), (3,4,3),(3,4,4),(3,4,5),(3,4,6),(3,4,7),(3,4,8),(3,4,9),(3,4,10),(3,4,11),(3,4,12),(3,4,13),(3,4,14),(3,4,15), ] for l in range(len(l2_lambda_list)): l2_lambda = l2_lambda_list[l] f = open("My Results.txt", "a") lambda_Str = str(l2_lambda) f.write("---------------------------------------\n") f.write("lambda = "+f"{lambda_Str}\n") f.write("---------------------------------------\n") for i in range(len(filters_nb)): F1_nb = filters_nb[i][0] F2_nb = filters_nb[i][1] F3_nb = filters_nb[i][2] kernel_size_1 = kernel_size[0] kernel_size_2 = kernel_size_1 kernel_size_3 = kernel_size_1 dropout_conv1 = DropOut[0][0] dropout_conv2 = DropOut[0][1] dropout_conv3 = DropOut[0][2] dropout_dense = DropOut[0][3] # fit model model, history = get_model(X_train, Y_train, X_validation, Y_validation, F1_nb, F2_nb, F3_nb, kernel_size_1, kernel_size_2, kernel_size_3, l2_lambda, learning_rate, reduce_lr, dropout_conv1, dropout_conv2, dropout_conv3, dropout_dense, no_epochs) # Evaluate metrics predictions = model.predict(X_test) out = np.argmax(predictions, axis=1) Y_test = sio.loadmat('./Y_test')['targets_test'] Y_test = np.squeeze(Y_test) loss = history.history['loss'][no_epochs-1] acc = history.history['acc'][no_epochs-1] val_loss = history.history['val_loss'][no_epochs-1] val_acc = history.history['val_acc'][no_epochs-1] # accuracy: (tp + tn) / (p + n) accuracy = accuracy_score(Y_test, out) # f1: 2 tp / (2 tp + fp + fn) f1 = f1_score(Y_test, out,average='macro') a = str(filters_nb[i][0]) + ',' + str(filters_nb[i][1]) + ',' + str(filters_nb[i][2]) + ': ' + str('f1-metric: ') + str('%f' % f1) + str(' | loss: ') + str('%f' % loss) + str(' | acc: ') + str('%f' % acc) + str(' | val_loss: ') + str('%f' % val_loss) + str(' | val_acc: ') + str('%f' % val_acc) + str(' | test_acc: ') + str('%f' % accuracy) f.write(f"{a}\n") f.close()
Keras Multiclass Classification (Dense model) - Confusion Matrix Incorrect
I have a labeled dataset. last column (78) contains 4 types of attack. following codes confusion matrix is correct for two types of attack. can any one help to modify the code for keras multiclass attack detection and correction for get correct confusion matrix? and for correct code for precision, FPR,TPR for multiclass. Thanks. import pandas as pd from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from tensorflow.keras.wrappers.scikit_learn import KerasClassifier from tensorflow.keras.models import Sequential, load_model from tensorflow.keras.layers import Dense from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt import seaborn as sns from keras.utils.np_utils import to_categorical dataset_original = pd.read_csv('./XYZ.csv') # Dron NaN value from Data Frame dataset = dataset_original.dropna() # data cleansing X = dataset.iloc[:, 0:78] print(X.info()) print(type(X)) y = dataset.iloc[:, 78] #78 is labeled column contains 4 anomaly type print(y) # encode the labels to 0, 1 respectively print(y[100:110]) encoder = LabelEncoder() y = encoder.fit_transform(y) print([y[100:110]]) # Split the dataset now XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.2, random_state=0) # feature scaling scalar = StandardScaler() XTrain = scalar.fit_transform(XTrain) XTest = scalar.transform(XTest) # modeling model = Sequential() model.add(Dense(units=16, kernel_initializer='uniform', activation='relu', input_dim=78)) model.add(Dense(units=8, kernel_initializer='uniform', activation='relu')) model.add(Dense(units=6, kernel_initializer='uniform', activation='relu')) model.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(XTrain, yTrain, batch_size=1000, epochs=10) history = model.fit(XTrain, yTrain, batch_size=1000, epochs=10, verbose=1, validation_data=(XTest, yTest)) yPred = model.predict(XTest) yPred = [1 if y > 0.5 else 0 for y in yPred] matrix = confusion_matrix(yTest, yPred)`enter code here` print(matrix) accuracy = (matrix[0][0] + matrix[1][1]) / (matrix[0][0] + matrix[0][1] + matrix[1][0] + matrix[1][1]) print("Accuracy: " + str(accuracy * 100) + "%")
If i understand correctly, you are trying to solve a multiclass classification problem where your target label belongs to 4 different attacks. Therefore, you should use the output Dense layer having 4 units instead of 1 with a 'softmax' activation function (not 'sigmoid' activation). Additionally, you should use 'categorical_crossentropy' loss in place of 'binary_crossentropy' while compiling your model. Furthermore, with this setting, applying argmax on prediction result (that has 4 class probability values for each test sample) you will get the final label/class. [Edit] Your confusion matrix and high accuracy indicates that you are working with an imbalanced dataset. May be very high number of samples are from class 0 and few samples are from the remaining 3 classes. To handle this you may want to apply weighting samples or over-sampling/under-sampling approaches.
Model trained using LSTM is predicting only same value for all
I have a dataset with 4000 rows and two columns. The first column contains some sentences and the second column contains some numbers for it. There are some 4000 sentences and they are categorized by some 100 different numbers. For example: Sentences Codes Google headquarters is in California 87390 Steve Jobs was a great man 70214 Steve Jobs has done great technology innovations 70214 Google pixel is a very nice phone 87390 Microsoft is another great giant in technology 67012 Bill Gates founded Microsoft 67012 Similarly, there are a total of 4000 rows containing these sentences and these rows are classified with 100 such codes I have tried the below code but when I am predicting, it is predicting one same value for all. IN othr words y_pred is giving an array of same values. May I know where is the code going wrong import pandas as pd import numpy as np xl = pd.ExcelFile("dataSet.xlsx") df = xl.parse('Sheet1') #df = df.sample(frac=1).reset_index(drop=True)# shuffling the dataframe df = df.sample(frac=1).reset_index(drop=True)# shuffling the dataframe X = df.iloc[:, 0].values Y = df.iloc[:, 1].values from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer import pickle count_vect = CountVectorizer() X = count_vect.fit_transform(X) tfidf_transformer = TfidfTransformer() X = tfidf_transformer.fit_transform(X) X = X.toarray() from sklearn.preprocessing import LabelEncoder, OneHotEncoder labelencoder_Y = LabelEncoder() Y = labelencoder_Y.fit_transform(Y) y = Y.reshape(-1, 1) # Because Y has only one column onehotencoder = OneHotEncoder(categories='auto') Y = onehotencoder.fit_transform(y).toarray() from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0) inputDataLength = len(X_test[0]) outputDataLength = len(Y[0]) from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.layers.embeddings import Embedding from keras.preprocessing import sequence from keras.layers import Dropout # fitting the model embedding_vector_length = 100 model = Sequential() model.add(Embedding(outputDataLength,embedding_vector_length, input_length=inputDataLength)) model.add(Dropout(0.2)) model.add(LSTM(outputDataLength)) model.add(Dense(outputDataLength, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=20) y_pred = model.predict(X_test) invorg = model.inverse_transform(y_test) y_test = labelencoder_Y.inverse_transform(invorg) inv = onehotencoder.inverse_transform(y_pred) y_pred = labelencoder_Y.inverse_transform(inv)
You are using binary_crossentropy eventhough you have 100 classes. Which is not the right thing to do. You have to use categorical_crossentropy for this task. Compile your model like this, model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) Also, you are predicting with the model and converting to class labels like this, y_pred = model.predict(X_test) inv = onehotencoder.inverse_transform(y_pred) y_pred = labelencoder_Y.inverse_transform(inv) Since your model is activated with softmax inorder to get the class label, you have to find the argmax of the predictions. For example, if the prediction was [0.2, 0.3, 0.0005, 0.99] you have to take argmax, which will give you output 3. The class that have high probability. So you have to modify the prediction code like this, y_pred = model.predict(X_test) y_pred = np.argmax(y_pred, axis=1) y_pred = labelencoder_Y.inverse_transform(y_pred) invorg = np.argmax(y_test, axis=1) invorg = labelencoder_Y.inverse_transform(invorg) Now you will have the actual class labels in invorg and predicted class labels at y_pred
Changing batch_size parameter in keras leads to broadcast error
I am running a simple encoder-decoder setup to train a representation for a one dimensional image. In this sample the input are lines with varying slopes and in the encoded layer we would expect something that resembles the slope. My setup is keras with a tensorflow backend. I am very new to this as well. It all works fine, at least until I move away from steps_per_epoch to batch_size in the model.fit() method. Certain values of the batch_size, such as 1,2,3, 8 and 16 do work, for others I get a value error. My initial guess was 2^n, but that did not work. The error I get for batch_size = 5 ValueError: operands could not be broadcast together with shapes (5,50) (3,50) (5,50) I am trying to understand which relation between batch_size and training data is valid such that it always passes. I assumed that the training set would be simply divided into floor(N/batch_size) batches and the remainder would be processed as such. My questions are: What is the relation between size of data set and batch_size that are allowed. What exactly is the keras/tensorflow trying to do such that the batch_size is important? Thank you very much for the help. The code to reproduce this is import numpy as np from keras.models import Model from keras.layers import Input, Dense, Conv1D, Concatenate from keras.losses import mse from keras.optimizers import Adam INPUT_DIM = 50 INTER_DIM = 15 LATENT_DIM = 1 # Prepare Sample Data one_line = np.linspace(1, 30, INPUT_DIM).reshape(1, INPUT_DIM) test_array = np.repeat(one_line, 1000, axis=0) slopes = np.linspace(0, 1, 1000).reshape(1000, 1) data = test_array * slopes # Train test split train_mask = np.where(np.random.sample(1000) < 0.8, 1, 0).astype('bool') x_train = data[train_mask].reshape(-1, INPUT_DIM, 1) x_test = data[~train_mask].reshape(-1, INPUT_DIM, 1) # Define Model input = Input(shape=(INPUT_DIM, 1), name='input') conv_layer_small = Conv1D(filters=1, kernel_size=[3], padding='same')(input) conv_layer_medium = Conv1D(filters=1, kernel_size=[5], padding='same')(input) merged_convs = Concatenate()( [conv_layer_small, conv_layer_medium]) latent = Dense(LATENT_DIM, name='latent_layer', activation='relu')(merged_convs) encoder = Model(input, latent) decoder_int = Dense(INTER_DIM, name='dec_int_layer', activation='relu')(latent) output = Dense(INPUT_DIM, name='output', activation='linear')(decoder_int) encoder_decoder = Model(input, output, name='encoder_decoder') # Add Loss reconstruction_loss = mse(input, output) encoder_decoder.add_loss(reconstruction_loss) encoder_decoder.compile(optimizer='adam') if __name__ == '__main__': epochs = 100 encoder_decoder.fit( x_train, epochs=epochs, batch_size=4, verbose=2 )