my dataset consists of (12-inputs,13 output)13 attributes , and i want to calculate metrics for each row of dataset but showing the error - keras

from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
# load the dataset
dataset = loadtxt('train.csv', delimiter=',')
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(dataset)
normalized = scaler.transform(dataset)
for row in normalized:
# split into input (X) and output (y) variables
X = row[0:13]
y = row[13]
# define the keras model
model = Sequential()
model.add(Dense(12, input_dim=13, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='relu'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mse', 'mae', 'mape'])
history =model.fit(X, y, epochs=20,batch_size=1 , verbose=2)
pyplot.plot(history.history['mse'])
pyplot.plot(history.history['mae'])
pyplot.plot(history.history['mape'])
pyplot.show()

As per the error message, the shape of your input X in model.fit is not in the correct shape. The model expects the input to be an array of shape (13,) but the input to the model is (1,) shaped array. Print X and y values and verify if they are getting stored as list of lists as in X = [[1,2,3,4,...13]] and Y = [[0]] like this. In that case give X[0] and y[0] in model.fit method.

Related

Is there any way to fit and apply deep learning algorithm on chemical smiles data in sequential model?

I have written a code for this where my input as an X
X : c1ccccc1 and Y value is water/methanol as classification category.
# multi-class classification with Keras
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
# load dataset
dataframe = pandas.read_csv("ADLV3_1.csv", header=None)
dataset = dataframe.values
X = dataset[:,2:3]
Y = dataset[:,3:4]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
# define baseline model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(8, input_dim=4, activation='relu'))
model.add(Dense(3, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
Code running successfully but getting warning as
/usr/local/lib/python3.7/dist-packages/sklearn/preprocessing/_label.py:235: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
/usr/local/lib/python3.7/dist-packages/sklearn/preprocessing/_label.py:268: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py:536: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).
FitFailedWarning)
Baseline: nan% (nan%)
Is there any solution to make the algorithm workable? I can't predict any values

How do I implement multilabel classification neural network with keras

I am attempting to implement a neural network using Keras with a problem that involves multilabel classification. I understand that one way to tackle the problem is to transform it to several binary classification problems. I have implemented one of these, but am not sure how to proceed with the others, mostly how do I go about combining them? My data set has 5 input variables and 5 labels. Generally a single sample of data would have 1-2 labels. It is rare to have more than two labels.
Here is my code (thanks to machinelearningmastery.com):
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataframe = pandas.read_csv("Realdata.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:5].astype(float)
Y = dataset[:,5]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# baseline model
def create_baseline():
# create model
model = Sequential()
model.add(Dense(5, input_dim=5, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
scores = model.evaluate(X, encoded_Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
#Make predictions....change the model.predict to whatever you want instead of X
predictions = model.predict(X)
# round predictions
rounded = [round(x[0]) for x in predictions]
print(rounded)
return model
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
The approach you are referring to is the one-versus-all or the one-versus-one strategy for multi-label classification. However, when using a neural network, the easiest solution for a multi-label classification problem with 5 labels is to use a single model with 5 output nodes. With keras:
model = Sequential()
model.add(Dense(5, input_dim=5, kernel_initializer='normal', activation='relu'))
model.add(Dense(5, kernel_initializer='normal', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='sgd')
You can provide the training labels as binary-encoded vectors of length 5. For instance, an example that corresponds to classes 2 and 3 would have the label [0 1 1 0 0].

multi-label text classification. I have a text/label csv. Text is pure text, labels are alphanumeric

import keras
import keras.backend as K
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense
from keras.layers.core import Activation
from keras.preprocessing.text import Tokenizer # for
tokenizing text
from keras.preprocessing.sequence import pad_sequences # for
padding sentences with zeros. To make the sentence length same
from keras.utils import to_categorical # for one-
hot encoding of the labels
from keras.layers import Dense, Input, Flatten, Dropout,
BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.models import Sequential
from sklearn.model_selection import train_test_split
MAX_SEQUENCE_LENGTH = 300
MAX_NB_WORDS = 20000
#Reading the data
raw_data=pd.read_csv("/home/riaz.k/Desktop/TRAIN.csv")
raw_data.head()
# create training and testing vars
train, test = train_test_split(raw_data, test_size=0.3)
train.head()
test.head()
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(train.Procedure)
train_sequences = tokenizer.texts_to_sequences(train.Procedure)
test_sequences = tokenizer.texts_to_sequences(test.Procedure)
word_index = tokenizer.word_index
containing words and their index
# print(tokenizer.word_index)
print('Found %s unique tokens.' % len(word_index))
train_data = pad_sequences(train_sequences,
maxlen=MAX_SEQUENCE_LENGTH)
train
test_data=pad_sequences(test_sequences,maxlen=MAX_SEQUENCE_LENGTH)
test
print(train_data.shape)
print(test_data.shape)
print (word_index)
train_labels = train['dxcode']
test_labels = test['dxcode']
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder() # converts the character
array to numeric array.
Assigns levels to unique labels.
le.fit(train_labels)
le.fit(test_labels)
train_labels = le.transform(train_labels)
test_labels = le.transform(test_labels)
print(le.classes_)
print(np.unique(train_labels, return_counts=True))
print(np.unique(test_labels, return_counts=True))
le.inverse_transform(1)
labels_train = to_categorical(np.asanyarray(train_labels))
labels_test = to_categorical(np.asarray(test_labels))
print('Shape of data tensor:', train_data.shape)
print('Shape of label tensor:', labels_train.shape)
print('Shape of label tensor:', labels_test.shape)
EMBEDDING_DIM = 100
print(MAX_SEQUENCE_LENGTH)
print('Training model.')
model = Sequential()
model.add(Embedding(MAX_NB_WORDS,
EMBEDDING_DIM,
input_length=MAX_SEQUENCE_LENGTH
))
model.add(Dropout(0.2))
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Conv1D(128, 5, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(23, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['acc'],)
model.fit(train_data, labels_train,
batch_size=32,
epochs=10,
validation_data=(test_data, labels_test))
model.evaluate(test_data, labels_test)
pred = model.predict(test_data)
pred
# print(model.layers)
for layer in model.layers:
print(layer)
import keras.backend as K
emd = K.function(inputs=[model.layers[0].input],
outputs=[model.layers[0].output])
rbind = np.concatenate((train_data, test_data), axis=0)
print(rbind.shape)
### Submissions file
test_results = model.predict_classes(rbind)
#print(test_results)
test_labels = le.inverse_transform(test_results)
#test_labels = [le.inverse_transform(i) for i in test_results]
submissions_CNN =
pd.DataFrame({'id':raw_data['Claimno'],"label":test_labels})
submissions_CNN.to_csv("/home/riaz.k/Desktop/submissions.csv",index=False)
The text document can be labelled with more than one label, so how can I do a multi-label classification on this dataset? I've read a lot of documentation from sklearn, but I can't seem to find the right way to do multi-label classification. Thanks in advance for any help.
Are you getting the error on this line:
train_labels = le.transform(train_labels)
If yes, then its because in the line just above it, you are doing this:
le.fit(test_labels)
What this does is it forgets the previous data (previous call to fit() on the line above it) and only remembers the data in the test_labels. So when a new label (which is present in train but not in test) comes, it will throw this error.
You need to replave the lines:
le.fit(train_labels)
le.fit(test_labels)
with this:
# I am using .tolist() because I observe that your
# train_labels, test_labels are pandas Series objects
le.fit(train_labels.tolist() + test_labels.tolist())

How to Build an LSTM Classifier Model in Keras

I am new to Keras and RNN
I need to build a Classifier Model using LSTM RNN in Keras for a Dataset that contain a train set of shape (1795575, 6) and labels array of shape (1795575, 1).The labels is 11 class (from 0 to 10)
The test set of shape (575643, 6) and Labels array of shape (575643, 1.Again, the labels is 11 (from 0 to 10)
How can I shape the following Keras Model to satisfy my Dataset.What Values should I put for ?
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import SGD
import numpy as np
data_dim = ?
timesteps = ?
num_classes = ?
batch_size = ?
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model = Sequential()
model.add(LSTM(32, return_sequences=True, stateful=True,batch_input_shape=
(batch_size, timesteps, data_dim)))
model.add(LSTM(32, return_sequences=True, stateful=True))
model.add(LSTM(32, stateful=True))
model.add(Dense(?, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',optimizer='sgd',
metrics=['accuracy'])
model.fit(train_X_arr, train_y_arr,batch_size=batch_size, epochs=epochs,
shuffle=False,validation_data=(test_X_arr, test_y_arr))
I appreciate your help and Thanks in advance
What you would like to do is this:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import SGD
import numpy as np
data_dim = 1 # EACH TIMESTAMP IS SCALAR SO SHAPE=1
timesteps = 6 # EACH EXAMPLE CONTAINS 6 TIMESTAMPS
num_classes = 1 # EACH LABEL IS ONE NUMBER SO SHAPE=1
batch_size = 1 # TAKE SIZE THAT CAN DIVIDE THE NUMBER OF EXAMPLES IN THE TRAIN DATA. THE HIGHER THE BATCH SIZE THE BETTER!
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model = Sequential()
model.add(LSTM(32, return_sequences=True, stateful=True,batch_input_shape=
(batch_size, timesteps, data_dim)))
model.add(LSTM(32, return_sequences=True, stateful=True))
model.add(LSTM(32, stateful=True))
model.add(Dense(1, activation='softmax')) # AT THE END YOU WANT ONE VALUE (LIKE THE LABELS) -> SO DENSE SHOULD OUTPUT 1 NODE
model.compile(loss='sparse_categorical_crossentropy',optimizer='sgd',
metrics=['accuracy'])
model.fit(train_X_arr, train_y_arr,batch_size=batch_size, epochs=epochs,
shuffle=False,validation_data=(test_X_arr, test_y_arr))
and that's it.
EDIT: In addition, make sure that you reshape your train data to be: (1795575, 6,1) -> 1795575 examples, each has 6 timestamps, each timestamps is scalar.
You can achieve that easily by using np.expand_dims(train_data,-1).

Python Keras LSTM input output shape issue

I am running keras over tensorflow, trying to implement a multi-dimensional LSTM network to predict a linear continuous target variable , a single value for each example(return_sequences = False).
My sequence length is 10 and number of features (dim) is 11.
This is what I run:
import pprint, pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
# Input sequence
wholeSequence = [[0,0,0,0,0,0,0,0,0,2,1],
[0,0,0,0,0,0,0,0,2,1,0],
[0,0,0,0,0,0,0,2,1,0,0],
[0,0,0,0,0,0,2,1,0,0,0],
[0,0,0,0,0,2,1,0,0,0,0],
[0,0,0,0,2,1,0,0,0,0,0],
[0,0,0,2,1,0,0,0,0,0,0],
[0,0,2,1,0,0,0,0,0,0,0],
[0,2,1,0,0,0,0,0,0,0,0],
[2,1,0,0,0,0,0,0,0,0,0]]
# Preprocess Data:
wholeSequence = np.array(wholeSequence, dtype=float) # Convert to NP array.
data = wholeSequence
target = np.array([20])
# Reshape training data for Keras LSTM model
data = data.reshape(1, 10, 11)
target = target.reshape(1, 1, 1)
# Build Model
model = Sequential()
model.add(LSTM(11, input_shape=(10, 11), unroll=True, return_sequences=False))
model.add(Dense(11))
model.add(Activation('linear'))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(data, target, nb_epoch=1, batch_size=1, verbose=2)
and get the error ValueError: Error when checking target: expected activation_1 to have 2 dimensions, but got array with shape (1, 1, 1)
Not sure what should the activation layer should get (shape wise)
Any help appreciated
thanks
If you just want to have a single linear output neuron, you can simply use a dense layer with one hidden unit and supply the activation there. Your output then can be a single vector without the reshape- I adjusted your given example code to make it work:
wholeSequence = np.array(wholeSequence, dtype=float) # Convert to NP array.
data = wholeSequence
target = np.array([20])
# Reshape training data for Keras LSTM model
data = data.reshape(1, 10, 11)
# Build Model
model = Sequential()
model.add(LSTM(11, input_shape=(10, 11), unroll=True, return_sequences=False))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(data, target, nb_epoch=1, batch_size=1, verbose=2)

Resources