How do I use the MLP score function? Error: shapes (295,1) and (7,450) not aligned: 1 (dim 1) != 7 (dim 0) - python-3.x

I have recently begun coding deep neural networks in Python and I have been stuck with this problem for weeks. I have checked other similar questions but could not grasp the solution.
I have a feed forward neural network and I am trying to obtain the R^2 value for my model. I have a dataframe of 1031 rows by 9 columns. I am fitting the first 7 columns (storm characteristics) with the water elevation as given in 8th or 9th column. Here 'h' is column heading that I am fitting with the first 7 columns.
See csv data at this link.
### Load dependencies
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
data = pd.read_csv (r'C:\Users\SM\Desktop\dataframe.csv')
### Define MLP Hyper-parameters
n = 20 # ----------- Number of layers
p = 450 # ---------- Number of perceptrons in each layer
i = 1 # ------------ Initializing iteration count
MLPsize = []
### While loop to create tuple for MLP hyperparameter input
while i <= n:
MLPsize.append(p)
i += 1
Model = MLPRegressor(hidden_layer_sizes=tuple(MLPsize),
activation='logistic', solver='adam', alpha=0.0001,
batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001,
power_t=0.5, max_iter=5000, shuffle=False, random_state=None, tol=0.0001,
verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
early_stopping=False, validation_fraction=0.5,
beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)
### Define function to run train-test simulation
msk = np.random.rand(len(data)) < 0.7
Train = data[msk]
Test = data[~msk]
Train = Train.reset_index(drop=True)
Test = Test.reset_index(drop=True)
def RunModel (h):
global Train_pred
global Test_pred
global Train_true
global Test_true
global Train_error
global Test_error
Train_error = []
Test_error = []
Model.fit(Train.iloc[:,:7], Train[h])
Train_pred = Model.predict(Train.iloc[:,:7])
Train_error.append(mean_squared_error(Train[h], Train_pred))
Train_true = np.array(Train[h]).reshape(-1, 1)
Train_pred = Train_pred.reshape(-1, 1)
Test_pred = Model.predict(Test.iloc[:,:7])
Test_error.append(mean_squared_error(Test[h], Test_pred))
Test_true = np.array(Test[h]).reshape(-1, 1)
Test_pred = Test_pred.reshape(-1, 1)
print(mean_squared_error(Train_pred, Train_true))
print(Model.score(Test_pred, Test_true))
RunModel('11939')
And I get an error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-01acc869ac62> in <module>
----> 1 Model.score(Test_pred, Test_true)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y, sample_weight)
406 from .metrics import r2_score
407 from .metrics.regression import _check_reg_targets
--> 408 y_pred = self.predict(X)
409 # XXX: Remove the check in 0.23
410 y_type, _, _, _ = _check_reg_targets(y, y_pred, None)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\extmath.py in safe_sparse_dot(a, b, dense_output)
140 return ret
141 else:
--> 142 return np.dot(a, b)
143
144
**ValueError: shapes (295,1) and (7,450) not aligned: 1 (dim 1) != 7 (dim 0)**
Please help. Any detailed explanation would be appreciated.

The line that raises the error is the following:
print(Model.score(Test_pred, Test_true))
This is not valid since .score() method of the MLPRegressor takes as inputs (X,y). Read more here: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor.score
So, if you want to estimate the R2 coefficient of the model you need to pass the Xtest and ytest that in your case are Test.iloc[:,:7], Test_true.
### Load dependencies
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
data = pd.read_csv (r'C:\Users\SM\Desktop\dataframe.csv')
### Define MLP Hyper-parameters
n = 20 # ----------- Number of layers
p = 450 # ---------- Number of perceptrons in each layer
i = 1 # ------------ Initializing iteration count
MLPsize = []
### While loop to create tuple for MLP hyperparameter input
while i <= n:
MLPsize.append(p)
i += 1
Model = MLPRegressor(hidden_layer_sizes=tuple(MLPsize),
activation='logistic', solver='adam', alpha=0.0001,
batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001,
power_t=0.5, max_iter=5000, shuffle=False, random_state=None, tol=0.0001,
verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
early_stopping=False, validation_fraction=0.5,
beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)
### Define function to run train-test simulation
msk = np.random.rand(len(data)) < 0.7
Train = data[msk]
Test = data[~msk]
Train = Train.reset_index(drop=True)
Test = Test.reset_index(drop=True)
def RunModel (h):
global Train_pred
global Test_pred
global Train_true
global Test_true
global Train_error
global Test_error
Train_error = []
Test_error = []
Model.fit(Train.iloc[:,:7], Train[h])
Train_pred = Model.predict(Train.iloc[:,:7])
Train_error.append(mean_squared_error(Train[h], Train_pred))
Train_true = np.array(Train[h]).reshape(-1, 1)
Train_pred = Train_pred.reshape(-1, 1)
Test_pred = Model.predict(Test.iloc[:,:7])
Test_error.append(mean_squared_error(Test[h], Test_pred))
Test_true = np.array(Test[h]).reshape(-1, 1)
Test_pred = Test_pred.reshape(-1, 1)
print(mean_squared_error(Train_pred, Train_true))
print(Model.score(Test.iloc[:,:7], Test_true))
RunModel('11939')
Prints:
0.5131277608869395
-0.02165748764016695

Related

Multi-Class Text Classification with BERT null prediction error

I am new to multi-class text classification with BERT. I have been following a tutorial (https://towardsdatascience.com/multi-label-multi-class-text-classification-with-bert-transformer-and-keras-c6355eccb63a) for leaning purposes.
I am able to get the script below running up to calculating the confusion matrix. The classification report also does not work. I would be grateful if someone can help me. My apologies if this question has already been asked. I searched everywhere and could not find an answer.
The error is here: y_predicted = numpy.argmax(predicted_raw, axis = 1). The error message says "axis 1 is out of bounds for array of dimension 1" When I change axis to zero. The new error message is "Singleton array 0 cannot be considered a valid collection." I think what the axis=0 error says is that y_predicted is null. I double checked it with an if statement.
import pandas
import numpy
import re
import nltk
# for plotting
import matplotlib.pyplot as plt
import seaborn as sns
input_dataframe = pandas.read_csv('tutorial6.csv')
fig, ax = plt.subplots()
fig.suptitle("Product", fontsize=12)
input_dataframe["Product"].reset_index().groupby("Product").count().sort_values(by=
"index").plot(kind="barh", legend=False,
ax=ax).grid(axis='x')
plt.show()
def utils_preprocess_text(text, flg_stemm=False, flg_lemm=True, lst_stopwords=None):
## clean (convert to lowercase and remove punctuations and characters and then strip)
text = re.sub(r'[^\w\s]', '', str(text).lower().strip())
## Tokenize (convert from string to list)
lst_text = text.split()
## remove Stopwords
if lst_stopwords is not None:
lst_text = [word for word in lst_text if word not in
lst_stopwords]
## Stemming (remove -ing, -ly, ...)
if flg_stemm == True:
ps = nltk.stem.porter.PorterStemmer()
lst_text = [ps.stem(word) for word in lst_text]
## Lemmatisation (convert the word into root word)
if flg_lemm == True:
lem = nltk.stem.wordnet.WordNetLemmatizer()
lst_text = [lem.lemmatize(word) for word in lst_text]
## back to string from list
text = " ".join(lst_text)
return text
lst_stopwords = nltk.corpus.stopwords.words("english")
input_dataframe["text_clean"] = input_dataframe ["Consumer_Complaint"].apply(lambda x:
utils_preprocess_text(x, flg_stemm=False, flg_lemm=True,
lst_stopwords=lst_stopwords))
from tensorflow.keras.utils import to_categorical
possible_labels = input_dataframe.Product.unique()
label_dict = {}
for index, possible_label in enumerate(possible_labels):
label_dict[possible_label] = index
print(label_dict)
input_dataframe['label'] = input_dataframe.Product.replace(label_dict)
# Split into train and test - stratify over Issue
from sklearn.model_selection import train_test_split
data_train, data_test = train_test_split(input_dataframe, test_size = 0.2,stratify = input_dataframe[["label"]])
# Load Huggingface transformers
from transformers import TFBertModel, BertConfig, BertTokenizerFast
# Then what you need from tensorflow.keras
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
### --------- Setup BERT ---------- ###
# Name of the BERT model to use
model_name = 'bert-base-uncased'
# Max length of tokens
max_length = 100
# Load transformers config and set output_hidden_states to False
config = BertConfig.from_pretrained(model_name)
config.output_hidden_states = False
# Load BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config)
# Load the Transformers BERT model
transformer_model = TFBertModel.from_pretrained(model_name, config = config)
### ------- Build the model ------- ###
# TF Keras documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model
# Load the MainLayer
bert = transformer_model.layers[0]
# Build your model input
input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
inputs = {'input_ids': input_ids}
# Load the Transformers BERT model as a layer in a Keras model
bert_model = bert(inputs)[1]
dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
pooled_output = dropout(bert_model, training=False)
# Then build your model output
product = Dense(8, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='product')(pooled_output)
outputs = {'product': product}
# And combine it all in a model object
model = Model(inputs=inputs, outputs=outputs, name='BERT_MultiLabel_MultiClass')
# Take a look at the model
model.summary()
# Set an optimizer
optimizer = Adam()
# Set loss and metrics
loss = {'product': CategoricalCrossentropy(from_logits = True)}
metric = {'product': CategoricalAccuracy('accuracy')}
# Compile the model
model.compile(
optimizer = optimizer,
loss = loss,
metrics = metric)
# Ready output data for the model
y_train = to_categorical(data_train['label'],8)
y_test = to_categorical(data_test['label'],8)
x_train = tokenizer(
text=data_train['Consumer_Complaint'].to_list(),
add_special_tokens=True,
max_length=max_length,
truncation=True,
padding=True,
return_tensors='tf',
return_token_type_ids = False,
return_attention_mask = False,
verbose = True)
x_test = tokenizer(
text=data_test['Consumer_Complaint'].to_list(),
add_special_tokens=True,
max_length=max_length,
truncation=True,
padding=True,
return_tensors='tf',
return_token_type_ids = False,
return_attention_mask = False,
verbose = True)
# Fit the model
history = model.fit(
x={'input_ids': x_train['input_ids']},
y={'product': y_train},
validation_split=0.2,
batch_size=64,
epochs=1)
### ----- Evaluate the model ------ ###
model_eval = model.evaluate(
x={'input_ids': x_test['input_ids']},
y={'product': y_test}
)
print("This is evaluation: ", model_eval)
accr = model.evaluate(x_test['input_ids'],y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0],accr[1]))
from matplotlib import pyplot as plt
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show();
# plot loss and accuracy
metrics = [k for k in history.history.keys() if ("loss" not in k) and ("val" not in k)]
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True)
ax[0].set(title="Training")
ax11 = ax[0].twinx()
ax[0].plot(history.history['loss'], color='black')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss', color='black')
for metric in metrics:
ax11.plot(history.history[metric], label=metric)
ax11.set_ylabel("Score", color='steelblue')
ax11.legend()
ax[1].set(title="Validation")
ax22 = ax[1].twinx()
ax[1].plot(history.history['val_loss'], color='black')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss', color='black')
for metric in metrics:
ax22.plot(history.history['val_'+metric], label=metric)
ax22.set_ylabel("Score", color="steelblue")
plt.show()
#Testing our model on the test data.
predicted_raw = model.predict({'input_ids':x_test['input_ids']})
print(type(predicted_raw))
predicted_raw=list(predicted_raw)
predicted_raw=numpy.array(predicted_raw)
y_predicted = numpy.argmax(predicted_raw, axis = 1)
y_true = data_test.label
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
confusionmatrix = confusion_matrix(y_predicted,y_true)
I am trying to get the confusion matrix and classification report working.

KNN Python implementation

this is what shows when i try running my code:
FutureWarning: Unlike other reduction functions (e.g. skew, kurtosis), the default behavior of mode typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of keepdims will become False, the axis over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set keepdims to True or False to avoid this warning.
lab = mode(labels)
This is my Python code, and i find some difficulties trying find a suited solution:
# Importing the required modules
import numpy as np
from scipy.stats import mode
# Euclidean Distance
def eucledian(p1, p2):
dist = np.sqrt(np.sum((p1 - p2) ** 2))
return dist
# Function to calculate KNN
def predict(x_train, y, x_input, k):
op_labels = []
# Loop through the Datapoints to be classified
for item in x_input:
# Array to store distances
point_dist = []
# Loop through each training Data
for j in range(len(x_train)):
distances = eucledian(np.array(x_train[j, :]), item)
# Calculating the distance
point_dist.append(distances)
point_dist = np.array(point_dist)
# Sorting the array while preserving the index
# Keeping the first K datapoints
dist = np.argsort(point_dist)[:k]
# Labels of the K datapoints from above
labels = y[dist]
** # Majority voting
lab = mode(labels)
lab = lab.mode[0]
op_labels.append(lab)**
return op_labels
# Importing the required modules
# Importing required modules
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from numpy.random import randint
# Loading the Data
iris= load_iris()
# Store features matrix in X
X= iris.data
# Store target vector in
y = iris.target
# Creating the training Data
train_idx = xxx = randint(0, 150, 100)
X_train = X[train_idx]
y_train = y[train_idx]
# Creating the testing Data
test_idx = xxx = randint(0, 150, 50) # taking 50 random samples
X_test = X[test_idx]
y_test = y[test_idx]
# Applying our function
y_pred = predict(X_train, y_train, X_test, 7)
# Checking the accuracy
accuracy_score(y_test, y_pred)
I am expecting a prediction/accuracy to be the prompt.
KNN can be done like this.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
# Assign colum names to the dataset
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
# Read dataset to pandas dataframe
dataset = pd.read_csv(url, names=names)
dataset.head()
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
# Result:
precision recall f1-score support
Iris-setosa 1.00 1.00 1.00 13
Iris-versicolor 1.00 0.89 0.94 9
Iris-virginica 0.89 1.00 0.94 8
accuracy 0.97 30
macro avg 0.96 0.96 0.96 30
weighted avg 0.97 0.97 0.97 30
error = []
# Calculating error for K values between 1 and 40
for i in range(1, 40):
knn = KNeighborsClassifier(n_neighbors=i)
knn.fit(X_train, y_train)
pred_i = knn.predict(X_test)
error.append(np.mean(pred_i != y_test))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')

How to integrate LIME with PyTorch?

Using this mnist image classification model :
%reset -f
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
import torchvision.datasets as dset
import os
import torch.nn.functional as F
import time
import random
import pickle
from sklearn.metrics import confusion_matrix
import pandas as pd
import sklearn
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
root = './data'
if not os.path.exists(root):
os.mkdir(root)
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
batch_size = 64
train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(
dataset=test_set,
batch_size=batch_size,
shuffle=True)
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(28*28, 500)
self.fc2 = nn.Linear(500, 256)
self.fc3 = nn.Linear(256, 2)
def forward(self, x):
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
num_epochs = 2
random_sample_size = 200
values_0_or_1 = [t for t in train_set if (int(t[1]) == 0 or int(t[1]) == 1)]
values_0_or_1_testset = [t for t in test_set if (int(t[1]) == 0 or int(t[1]) == 1)]
print(len(values_0_or_1))
print(len(values_0_or_1_testset))
train_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1,
batch_size=batch_size,
shuffle=True)
test_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1_testset,
batch_size=batch_size,
shuffle=False)
train_loader = train_loader_subset
# Hyper-parameters
input_size = 100
hidden_size = 100
num_classes = 2
# learning_rate = 0.00001
learning_rate = .0001
# Device configuration
device = 'cpu'
print_progress_every_n_epochs = 1
model = NeuralNet().to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
N = len(train_loader)
# Train the model
total_step = len(train_loader)
most_recent_prediction = []
test_actual_predicted_dict = {}
rm = random.sample(list(values_0_or_1), random_sample_size)
train_loader_subset = data_utils.DataLoader(rm, batch_size=4)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader_subset):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch) % print_progress_every_n_epochs == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
predicted_test = []
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
probs_l = []
predicted_values = []
actual_values = []
labels_l = []
with torch.no_grad():
for images, labels in test_loader_subset:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
predicted_test.append(predicted.cpu().numpy())
sm = torch.nn.Softmax()
probabilities = sm(outputs)
probs_l.append(probabilities)
labels_l.append(labels.cpu().numpy())
predicted_values.append(np.concatenate(predicted_test).ravel())
actual_values.append(np.concatenate(labels_l).ravel())
if (epoch) % 1 == 0:
print('test accuracy : ', 100 * len((np.where(np.array(predicted_values[0])==(np.array(actual_values[0])))[0])) / len(actual_values[0]))
I'm to attempting to integrate 'Local Interpretable Model-Agnostic Explanations for machine learning classifiers' : https://marcotcr.github.io/lime/
It appears PyTorch support is not enabled as it is not mentioned in doc and following tutorial :
https://marcotcr.github.io/lime/tutorials/Tutorial%20-%20images.html
With my updated code for PyTorch :
from lime import lime_image
import time
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(images[0].reshape(28,28), model(images[0]), top_labels=5, hide_color=0, num_samples=1000)
Causes error :
/opt/conda/lib/python3.6/site-packages/skimage/color/colorconv.py in gray2rgb(image, alpha)
830 is_rgb = False
831 is_alpha = False
--> 832 dims = np.squeeze(image).ndim
833
834 if dims == 3:
AttributeError: 'Tensor' object has no attribute 'ndim'
So appears tensorflow object is expected here ?
How to integrate LIME with PyTorch image classification ?
Here's my solution:
Lime expects an image input of type numpy. This is why you get the attribute error and a solution would be to convert the image (from Tensor) to numpy before passing it to the explainer object. Another solution would be to select a specific image with the test_loader_subset and convert it with img = img.numpy().
Secondly, in order to make LIME work with pytorch (or any other framework), you'll need to specify a batch prediction function which outputs the prediction scores of each class for each image. The name of this function (here I've called it batch_predict) is then passed to explainer.explain_instance(img, batch_predict, ...). The batch_predict needs to loop through all images passed to it, convert them to Tensor, make a prediction and finally return the prediction score list (with numpy values). This is how I got it working.
Note also that the images need to have shape (... ,... ,3) or (... ,... ,1) in order to be properly segmented by the default segmentation algorithm. This means that you might have to use np.transpose(img, (...)). You may specify the segmentation algorithm as well if the results are poor.
Finally you'll need to display the LIME image mask on top of the original image. This snippet shows how this may be done:
from skimage.segmentation import mark_boundaries
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=5, hide_rest=False)
img_boundry = mark_boundaries(temp, mask)
plt.imshow(img_boundry)
plt.show()
This notebook is a good reference:
https://github.com/marcotcr/lime/blob/master/doc/notebooks/Tutorial%20-%20images%20-%20Pytorch.ipynb

Input size (depth of inputs) must be accessible via shape inference, but saw value None error whaen trying to set tf.expand_dims axis to 0

I am trying to use 20 news groups data set available in sklearn to train a LSTM to do incremental learning (classification). I used the sklearn's TfidfVectorizer to pre-process the data. Then I turned the resulting sparse matrix into a numpy array before feeding it. After that when coding the below line:
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
It gave an error saying that the 'inputs_' should have 3 dimensions. so I used:
inputs_ = tf.expand_dims(inputs_, 0)
To expand the dimension. But when I do that i get the error:
ValueError: Input size (depth of inputs) must be accessible via shape
inference, but saw value None.
The shape of 'input_' is:
(1, 134410)
I already went through this post, but it did not help.
I cannot seem to understand how to solve this issue. Any help is much appreciated. Thank you in advance!
show below is my complete code:
import os
from collections import Counter
import tensorflow as tf
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import f1_score, recall_score, precision_score
from string import punctuation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
def pre_process():
newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer()
features = vectorizer.fit_transform(newsgroups_data.data)
lb = LabelBinarizer()
labels = np.reshape(newsgroups_data.target, [-1])
labels = lb.fit_transform(labels)
return features, labels
def get_batches(x, y, batch_size=1):
for ii in range(0, len(y), batch_size):
yield x[ii:ii + batch_size], y[ii:ii + batch_size]
def plot_error(errorplot, datapoint, numberOfWrongPreds):
errorplot.set_xdata(np.append(errorplot.get_xdata(), datapoint))
errorplot.set_ydata(np.append(errorplot.get_ydata(), numberOfWrongPreds))
errorplot.autoscale(enable=True, axis='both', tight=None)
plt.draw()
def train_test():
features, labels = pre_process()
#Defining Hyperparameters
epochs = 1
lstm_layers = 1
batch_size = 1
lstm_size = 30
learning_rate = 0.003
print(lstm_size)
print(batch_size)
print(epochs)
#--------------placeholders-------------------------------------
# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
tf.set_random_seed(1)
inputs_ = tf.placeholder(tf.float32, [None,None], name = "inputs")
# labels_ = tf.placeholder(dtype= tf.int32)
labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")
#getting dynamic batch size according to the input tensor size
# dynamic_batch_size = tf.shape(inputs_)[0]
#output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.
keep_prob = tf.placeholder(tf.float32, name = "keep_prob")
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
#Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
inputs_ = tf.expand_dims(inputs_, 0)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
#hidden layer
hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)
logit = tf.contrib.layers.fully_connected(hidden, 1, activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
saver = tf.train.Saver()
# ----------------------------online training-----------------------------------------
with tf.Session(graph=graph) as sess:
tf.set_random_seed(1)
sess.run(tf.global_variables_initializer())
iteration = 1
state = sess.run(initial_state)
wrongPred = 0
errorplot, = plt.plot([], [])
for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):
feed = {inputs_: x.toarray(),
labels_: y,
keep_prob: 0.5,
initial_state: state}
predictions = tf.round(tf.nn.softmax(logit)).eval(feed_dict=feed)
print("----------------------------------------------------------")
print("Iteration: {}".format(iteration))
print("Prediction: ", predictions)
print("Actual: ",y)
pred = np.array(predictions)
print(pred)
print(y)
if not ((pred==y).all()):
wrongPred += 1
if ii % 27 == 0:
plot_error(errorplot,ii,wrongPred)
loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
print("Train loss: {:.3f}".format(loss))
iteration += 1
saver.save(sess, "checkpoints/sentiment.ckpt")
errorRate = wrongPred/len(labels)
print("ERROR RATE: ", errorRate )
if __name__ == '__main__':
train_test()
ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
This error is given because you don't specify the size nor the amount of inputs.
I got the script working like this:
inputs_ = tf.placeholder(tf.float32, [1,None], name = "inputs")
inputs_withextradim = tf.expand_dims(inputs_, 2)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_withextradim, initial_state=initial_state)

Value error while one hot encoding a variable in the jupyter notebook for a neural network

In the code below I am attempting to create a neural network that tries to replicate classical music. I am trying to one hot encode the output variable with this line of code:
y = np_utils.to_categorical(dataY)
but it gives me this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-21-0b5dca9da053> in <module>()
65 X = X / float(n_vocab)
66 # one hot encode the output variable
---> 67 y = np_utils.to_categorical(dataY)
68 # define the LSTM model
69 model = Sequential()
~\Anaconda3\lib\site-packages\keras\utils\np_utils.py in to_categorical(y,
num_classes)
26 y = y.ravel()
27 if not num_classes:
---> 28 num_classes = np.max(y) + 1
29 n = y.shape[0]
30 categorical = np.zeros((n, num_classes))
~\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in amax(a, axis,
out, keepdims)
2318
2319 return _methods._amax(a, axis=axis,
-> 2320 out=out, **kwargs)
2321
2322
~\Anaconda3\lib\site-packages\numpy\core\_methods.py in _amax(a, axis, out,
keepdims)
24 # small reductions
25 def _amax(a, axis=None, out=None, keepdims=False):
---> 26 return umr_maximum(a, axis, None, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation maximum which has no
identity
Below is the complete code:
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
def get_notes():
""" Get all the notes and chords from the midi files in the ./midi_songs
directory """
notes = []
for file in glob.glob("smaller classical music repertoir/*.mid"):
midi = converter.parse(file)
notes_to_parse = None
parts = instrument.partitionByInstrument(midi)
if parts: # file has instrument parts
notes_to_parse = parts.parts[0].recurse()
else: # file has notes in a flat structure
notes_to_parse = midi.flat.notes
for element in notes_to_parse:
if isinstance(element, note.Note):
notes.append(str(element.pitch))
elif isinstance(element, chord.Chord):
notes.append('.'.join(str(n) for n in element.normalOrder))
with open('C:/Users/emili/Documents/python projects/music data/notes1', 'wb') as filepath:
pickle.dump(notes, filepath)
return notes
notes = get_notes()
# create mapping of unique chars to integers
chars = sorted(list(set(notes)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(notes)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
seq_in = notes[i:i + seq_length]
seq_out = notes[i + seq_length]
dataX.append([char_to_int[char] for char in seq_in])
dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
# define the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(X.shape[1], X.shape[2]),
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(46))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1,
save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=1000, batch_size=64, callbacks=callbacks_list)
edit: For some reason, the code works when I compile it in sublime text but not in the jupyter notebook.

Resources