Multi-Class Text Classification with BERT null prediction error - text

I am new to multi-class text classification with BERT. I have been following a tutorial (https://towardsdatascience.com/multi-label-multi-class-text-classification-with-bert-transformer-and-keras-c6355eccb63a) for leaning purposes.
I am able to get the script below running up to calculating the confusion matrix. The classification report also does not work. I would be grateful if someone can help me. My apologies if this question has already been asked. I searched everywhere and could not find an answer.
The error is here: y_predicted = numpy.argmax(predicted_raw, axis = 1). The error message says "axis 1 is out of bounds for array of dimension 1" When I change axis to zero. The new error message is "Singleton array 0 cannot be considered a valid collection." I think what the axis=0 error says is that y_predicted is null. I double checked it with an if statement.
import pandas
import numpy
import re
import nltk
# for plotting
import matplotlib.pyplot as plt
import seaborn as sns
input_dataframe = pandas.read_csv('tutorial6.csv')
fig, ax = plt.subplots()
fig.suptitle("Product", fontsize=12)
input_dataframe["Product"].reset_index().groupby("Product").count().sort_values(by=
"index").plot(kind="barh", legend=False,
ax=ax).grid(axis='x')
plt.show()
def utils_preprocess_text(text, flg_stemm=False, flg_lemm=True, lst_stopwords=None):
## clean (convert to lowercase and remove punctuations and characters and then strip)
text = re.sub(r'[^\w\s]', '', str(text).lower().strip())
## Tokenize (convert from string to list)
lst_text = text.split()
## remove Stopwords
if lst_stopwords is not None:
lst_text = [word for word in lst_text if word not in
lst_stopwords]
## Stemming (remove -ing, -ly, ...)
if flg_stemm == True:
ps = nltk.stem.porter.PorterStemmer()
lst_text = [ps.stem(word) for word in lst_text]
## Lemmatisation (convert the word into root word)
if flg_lemm == True:
lem = nltk.stem.wordnet.WordNetLemmatizer()
lst_text = [lem.lemmatize(word) for word in lst_text]
## back to string from list
text = " ".join(lst_text)
return text
lst_stopwords = nltk.corpus.stopwords.words("english")
input_dataframe["text_clean"] = input_dataframe ["Consumer_Complaint"].apply(lambda x:
utils_preprocess_text(x, flg_stemm=False, flg_lemm=True,
lst_stopwords=lst_stopwords))
from tensorflow.keras.utils import to_categorical
possible_labels = input_dataframe.Product.unique()
label_dict = {}
for index, possible_label in enumerate(possible_labels):
label_dict[possible_label] = index
print(label_dict)
input_dataframe['label'] = input_dataframe.Product.replace(label_dict)
# Split into train and test - stratify over Issue
from sklearn.model_selection import train_test_split
data_train, data_test = train_test_split(input_dataframe, test_size = 0.2,stratify = input_dataframe[["label"]])
# Load Huggingface transformers
from transformers import TFBertModel, BertConfig, BertTokenizerFast
# Then what you need from tensorflow.keras
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
### --------- Setup BERT ---------- ###
# Name of the BERT model to use
model_name = 'bert-base-uncased'
# Max length of tokens
max_length = 100
# Load transformers config and set output_hidden_states to False
config = BertConfig.from_pretrained(model_name)
config.output_hidden_states = False
# Load BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config)
# Load the Transformers BERT model
transformer_model = TFBertModel.from_pretrained(model_name, config = config)
### ------- Build the model ------- ###
# TF Keras documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model
# Load the MainLayer
bert = transformer_model.layers[0]
# Build your model input
input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
inputs = {'input_ids': input_ids}
# Load the Transformers BERT model as a layer in a Keras model
bert_model = bert(inputs)[1]
dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
pooled_output = dropout(bert_model, training=False)
# Then build your model output
product = Dense(8, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='product')(pooled_output)
outputs = {'product': product}
# And combine it all in a model object
model = Model(inputs=inputs, outputs=outputs, name='BERT_MultiLabel_MultiClass')
# Take a look at the model
model.summary()
# Set an optimizer
optimizer = Adam()
# Set loss and metrics
loss = {'product': CategoricalCrossentropy(from_logits = True)}
metric = {'product': CategoricalAccuracy('accuracy')}
# Compile the model
model.compile(
optimizer = optimizer,
loss = loss,
metrics = metric)
# Ready output data for the model
y_train = to_categorical(data_train['label'],8)
y_test = to_categorical(data_test['label'],8)
x_train = tokenizer(
text=data_train['Consumer_Complaint'].to_list(),
add_special_tokens=True,
max_length=max_length,
truncation=True,
padding=True,
return_tensors='tf',
return_token_type_ids = False,
return_attention_mask = False,
verbose = True)
x_test = tokenizer(
text=data_test['Consumer_Complaint'].to_list(),
add_special_tokens=True,
max_length=max_length,
truncation=True,
padding=True,
return_tensors='tf',
return_token_type_ids = False,
return_attention_mask = False,
verbose = True)
# Fit the model
history = model.fit(
x={'input_ids': x_train['input_ids']},
y={'product': y_train},
validation_split=0.2,
batch_size=64,
epochs=1)
### ----- Evaluate the model ------ ###
model_eval = model.evaluate(
x={'input_ids': x_test['input_ids']},
y={'product': y_test}
)
print("This is evaluation: ", model_eval)
accr = model.evaluate(x_test['input_ids'],y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0],accr[1]))
from matplotlib import pyplot as plt
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show();
# plot loss and accuracy
metrics = [k for k in history.history.keys() if ("loss" not in k) and ("val" not in k)]
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True)
ax[0].set(title="Training")
ax11 = ax[0].twinx()
ax[0].plot(history.history['loss'], color='black')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss', color='black')
for metric in metrics:
ax11.plot(history.history[metric], label=metric)
ax11.set_ylabel("Score", color='steelblue')
ax11.legend()
ax[1].set(title="Validation")
ax22 = ax[1].twinx()
ax[1].plot(history.history['val_loss'], color='black')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss', color='black')
for metric in metrics:
ax22.plot(history.history['val_'+metric], label=metric)
ax22.set_ylabel("Score", color="steelblue")
plt.show()
#Testing our model on the test data.
predicted_raw = model.predict({'input_ids':x_test['input_ids']})
print(type(predicted_raw))
predicted_raw=list(predicted_raw)
predicted_raw=numpy.array(predicted_raw)
y_predicted = numpy.argmax(predicted_raw, axis = 1)
y_true = data_test.label
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
confusionmatrix = confusion_matrix(y_predicted,y_true)
I am trying to get the confusion matrix and classification report working.

Related

Tuple index out of range when trying to fit the CNN model

The dataset that I am using is the standard chest Xray dataset https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia. Have been getting this error (tuple index out of range) while fitting the CNN model. Is there a way to circumvent this issue? I suppose argument "validation_data" needs to be appended in some way.
import os
import glob
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
import random
#from pathlib import path
import pathlib2 as pathlib
from pathlib2 import Path
#from keras.models import sequential, Model, load_model
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
from tensorflow.keras import backend as K
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
%matplotlib inline
import shutup; shutup.please()
# DATA PATH #
print (os.listdir("C:/Users/Syd_R/OneDrive/Desktop/Peeumonia_data/archive/chest_xray/chest_xray/"))
data_dir = Path("C:/Users/Syd_R/OneDrive/Desktop/Peeumonia_data/archive/chest_xray/chest_xray/")
train_dir = data_dir/'train'
val_dir = data_dir/'val'
test_dir = data_dir/'test'
# LOAD TRAINING DATA TO DATAFRAME #
def load_train():
normal_cases_dir =train_dir/'NORMAL'
pneumonia_cases_dir = train_dir/ 'PNEUMONIA'
# list of all images
normal_cases = normal_cases_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_cases_dir.glob('*.jpeg')
train_data=[]
train_label=[]
for img in normal_cases:
train_data.append(img)
train_label.append('NORMAL')
for img in pneumonia_cases:
train_data.append(img)
train_label.append('PNEUMONIA')
df=pd.DataFrame(train_data)
df.columns = ['images']
df['labels'] = train_label
df=df.sample(frac=1).reset_index(drop=True)
return df
train_data = load_train()
train_data.shape
# VIZUALIZE THE AMOUNT OF TRAINING DATA WITH LABELS #
plt.bar(train_data['labels'].value_counts().index,train_data['labels'].value_counts().values)
plt.show()
# VIZUALIZE THE TRAINING IMAGE DATA BY RANDOM SAMPLING#
plt.figure(figsize=(10,5))
for i in range(10):
ax = plt.subplot(2,5,i+1)
num= random.randint(0, 5000+i)
im=train_data.loc[num].at['images']
im1=train_data.loc[num].at['labels']
img = cv2.imread(str(im))
img = cv2.resize(img, (224,224))
plt.imshow(img)
plt.title(im1)
plt.axis("off")
print(num)
# DATA PRE-PROCESSING #
def prepare_and_load(isval=True):
if isval==True:
normal_dir=val_dir/'NORMAL'
pneumonia_dir=val_dir/'PNEUMONIA'
else:
normal_dir=test_dir/'NORMAL'
pneumonia_dir=test_dir/'PNEUMONIA'
normal_cases = normal_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_dir.glob('*.jpeg')
data,labels=([] for x in range (2))
def prepare(case):
for img in case:
img = cv2.imread(str(img))
img = cv2.resize(img, (224,224))
if img.shape[2] ==1:
img = np.dstack([img, img, img])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)/255
if case==normal_cases:
label = to_categorical(0, num_classes=2)
else:
label = to_categorical(1, num_classes=2)
data.append(img)
labels.append(label)
return data,labels
prepare(normal_cases)
d,l=prepare(pneumonia_cases)
d=np.array(d)
l=np.array(1)
return d,l
val_data,val_labels = prepare_and_load(isval=True)
test_data,test_labels = prepare_and_load(isval=False)
print('Number of test images -->', len(test_data))
print('Number of validation images -->', len(val_data))
# DEFINE A FUNCTION TO GENERATE BATCHES FROM TRAINING IMAGES #
def data_gen(data, batch_size):
# Get tiotal number of samples in the data
n= len(data)
steps = n//batch_size
# Define two numpy arrays for containing batch data and labels
batch_data = np.zeros((batch_size, 224, 224, 3), dtype=np.float32)
batch_labels = np.zeros((batch_size,2), dtype=np.float32)
# Get a numpy array of all the indices of the input data
indices = np.arange(n)
# Initalize a counter
i=0
while True:
np.random.shuffle(indices)
# Get the next batch
count = 0
next_batch =indices [(i*batch_size): (i+1)*batch_size]
for j,idx in enumerate(next_batch):
img_name = data.iloc[idx]['images']
label = data.iloc[idx]['images']
if label=='NORMAL':
label=0
else:
label=1
# one hot encoding
encoded_label = to_categorical(label, num_classes=2)
# read the image and resize
img = cv2.imread(str(img_name))
img = cv2.resize(img,(224,224))
# check if it's grayscale
if img.shape[2]==1:
img = np.dstack([img, img, img])
# cv2 reads in BGR mode by default
orig_imag = cv2.cvtColor(img, cv2. COLOR_BGR2RGB)
# normalize the image pixels
orig_img = img.astype(np.float32)/255
batch_data[count]= orig_img
batch_labels[count] = encoded_label
count+=1
if count==batch_size-1:
break
i+=1
yield batch_data, batch_labels
if i>=steps:
i=0
# DEFINE THE CNN MODEL #
model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(224, 224, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(2))
model.add(Activation('softmax'))
# DEFINE PARAMETERS FOR THE CNN MODEL #
batch_size = 64
nb_epochs = 3
# Get a train data generator
train_data_gen = data_gen(data= train_data, batch_size=batch_size)
# DEFINE THE NUMBER OF TRAINING STEPS #
nb_train_steps = train_data.shape[0]//batch_size
print("Number of training and validation steps: {} and {}".format(nb_train_steps, len(val_data)))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
# FIT THE MODEL #
history = model.fit_generator(train_data_gen,
epochs=nb_epochs,
steps_per_epoch=nb_train_steps,
validation_data=(val_data, val_labels))

How to calculate Precision,Recall and F1 score using sklearn

I am trying to calculate the Precision, Recall and F1 in this sample code. I have calculated the accuracy of the model on train and test dataset. Kindly help to calculate these matrices.
Please look at the code I have comment every important line for an explanation.
# develop a classifier for the Faces Dataset
from numpy import load
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
import pickle
# load dataset
data = load('faces-embeddings.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Dataset: train=%d, test=%d' % (trainX.shape[0], testX.shape[0]))
# normalize input vectors
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(trainX)
testX = in_encoder.transform(testX)
# label encode targets
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy = out_encoder.transform(trainy)
testy = out_encoder.transform(testy)
# fit model
model = SVC(kernel='linear', probability=True)
model.fit(trainX, trainy)
#Saving Model
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
# predict
yhat_train = model.predict(trainX)
yhat_test = model.predict(testX)
# score
score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)
# summarize
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))
knowing the true value of Y (trainy here) and the predicted value of Y (yhat_train here) you can directly compute the precision, recall and F1 score, exactly as you did for the accuracy (thanks to sklearn.metrics):
sklearn.metrics.precision_score(trainy,yhat_train)
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html#sklearn.metrics.precision_score
sklearn.metrics.recall_score(trainy,yhat_train)
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.recall_score.html#sklearn.metrics.recall_score
sklearn.metrics.f1_score(trainy,yhat_train)
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score

How to integrate LIME with PyTorch?

Using this mnist image classification model :
%reset -f
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
import torchvision.datasets as dset
import os
import torch.nn.functional as F
import time
import random
import pickle
from sklearn.metrics import confusion_matrix
import pandas as pd
import sklearn
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
root = './data'
if not os.path.exists(root):
os.mkdir(root)
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
batch_size = 64
train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(
dataset=test_set,
batch_size=batch_size,
shuffle=True)
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(28*28, 500)
self.fc2 = nn.Linear(500, 256)
self.fc3 = nn.Linear(256, 2)
def forward(self, x):
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
num_epochs = 2
random_sample_size = 200
values_0_or_1 = [t for t in train_set if (int(t[1]) == 0 or int(t[1]) == 1)]
values_0_or_1_testset = [t for t in test_set if (int(t[1]) == 0 or int(t[1]) == 1)]
print(len(values_0_or_1))
print(len(values_0_or_1_testset))
train_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1,
batch_size=batch_size,
shuffle=True)
test_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1_testset,
batch_size=batch_size,
shuffle=False)
train_loader = train_loader_subset
# Hyper-parameters
input_size = 100
hidden_size = 100
num_classes = 2
# learning_rate = 0.00001
learning_rate = .0001
# Device configuration
device = 'cpu'
print_progress_every_n_epochs = 1
model = NeuralNet().to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
N = len(train_loader)
# Train the model
total_step = len(train_loader)
most_recent_prediction = []
test_actual_predicted_dict = {}
rm = random.sample(list(values_0_or_1), random_sample_size)
train_loader_subset = data_utils.DataLoader(rm, batch_size=4)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader_subset):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch) % print_progress_every_n_epochs == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
predicted_test = []
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
probs_l = []
predicted_values = []
actual_values = []
labels_l = []
with torch.no_grad():
for images, labels in test_loader_subset:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
predicted_test.append(predicted.cpu().numpy())
sm = torch.nn.Softmax()
probabilities = sm(outputs)
probs_l.append(probabilities)
labels_l.append(labels.cpu().numpy())
predicted_values.append(np.concatenate(predicted_test).ravel())
actual_values.append(np.concatenate(labels_l).ravel())
if (epoch) % 1 == 0:
print('test accuracy : ', 100 * len((np.where(np.array(predicted_values[0])==(np.array(actual_values[0])))[0])) / len(actual_values[0]))
I'm to attempting to integrate 'Local Interpretable Model-Agnostic Explanations for machine learning classifiers' : https://marcotcr.github.io/lime/
It appears PyTorch support is not enabled as it is not mentioned in doc and following tutorial :
https://marcotcr.github.io/lime/tutorials/Tutorial%20-%20images.html
With my updated code for PyTorch :
from lime import lime_image
import time
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(images[0].reshape(28,28), model(images[0]), top_labels=5, hide_color=0, num_samples=1000)
Causes error :
/opt/conda/lib/python3.6/site-packages/skimage/color/colorconv.py in gray2rgb(image, alpha)
830 is_rgb = False
831 is_alpha = False
--> 832 dims = np.squeeze(image).ndim
833
834 if dims == 3:
AttributeError: 'Tensor' object has no attribute 'ndim'
So appears tensorflow object is expected here ?
How to integrate LIME with PyTorch image classification ?
Here's my solution:
Lime expects an image input of type numpy. This is why you get the attribute error and a solution would be to convert the image (from Tensor) to numpy before passing it to the explainer object. Another solution would be to select a specific image with the test_loader_subset and convert it with img = img.numpy().
Secondly, in order to make LIME work with pytorch (or any other framework), you'll need to specify a batch prediction function which outputs the prediction scores of each class for each image. The name of this function (here I've called it batch_predict) is then passed to explainer.explain_instance(img, batch_predict, ...). The batch_predict needs to loop through all images passed to it, convert them to Tensor, make a prediction and finally return the prediction score list (with numpy values). This is how I got it working.
Note also that the images need to have shape (... ,... ,3) or (... ,... ,1) in order to be properly segmented by the default segmentation algorithm. This means that you might have to use np.transpose(img, (...)). You may specify the segmentation algorithm as well if the results are poor.
Finally you'll need to display the LIME image mask on top of the original image. This snippet shows how this may be done:
from skimage.segmentation import mark_boundaries
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=5, hide_rest=False)
img_boundry = mark_boundaries(temp, mask)
plt.imshow(img_boundry)
plt.show()
This notebook is a good reference:
https://github.com/marcotcr/lime/blob/master/doc/notebooks/Tutorial%20-%20images%20-%20Pytorch.ipynb

ValueError: Can only tuple-index with a MultiIndex

For a Multilabel Classification problem i am trying to plot precission and recall curve.
The sample code is taken from "https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html#sphx-glr-auto-examples-model-selection-plot-precision-recall-py" under section Create multi-label data, fit, and predict.
I am trying to fit it in my code but i get below error as "ValueError: Can only tuple-index with a MultiIndex" when i try below code.
train_df.columns.values
array(['DefId', 'DefectCount', 'SprintNo', 'ReqName', 'AreaChange',
'CodeChange', 'TestSuite'], dtype=object)
Test Suite is the value to be predicted
X_train = train_df.drop("TestSuite", axis=1)
Y_train = train_df["TestSuite"]
X_test = test_df.drop("DefId", axis=1).copy()
classes --> i have hardcorded with the testsuite values
from sklearn.preprocessing import label_binarize
# Use label_binarize to be multi-label like settings
Y = label_binarize(Y_train, classes=np.array([0, 1, 2,3,4])
n_classes = Y.shape[1]
# We use OneVsRestClassifier for multi-label prediction
from sklearn.multiclass import OneVsRestClassifier
# Run classifier
classifier = OneVsRestClassifier(svm.LinearSVC(random_state=3))
classifier.fit(X_train, Y_train)
y_score = classifier.decision_function(X_test)
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
import pandas as pd
# For each class
precision = dict()
recall = dict()
average_precision = dict()
#n_classes = Y.shape[1]
for i in range(n_classes):
precision[i], recall[i], _ = precision_recall_curve(Y_train[:, i], y_score[:, i])
average_precision[i] = average_precision_score(Y_train[:, i], y_score[:, i])
Input Data -> Values has been categorised

Input size (depth of inputs) must be accessible via shape inference, but saw value None error whaen trying to set tf.expand_dims axis to 0

I am trying to use 20 news groups data set available in sklearn to train a LSTM to do incremental learning (classification). I used the sklearn's TfidfVectorizer to pre-process the data. Then I turned the resulting sparse matrix into a numpy array before feeding it. After that when coding the below line:
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
It gave an error saying that the 'inputs_' should have 3 dimensions. so I used:
inputs_ = tf.expand_dims(inputs_, 0)
To expand the dimension. But when I do that i get the error:
ValueError: Input size (depth of inputs) must be accessible via shape
inference, but saw value None.
The shape of 'input_' is:
(1, 134410)
I already went through this post, but it did not help.
I cannot seem to understand how to solve this issue. Any help is much appreciated. Thank you in advance!
show below is my complete code:
import os
from collections import Counter
import tensorflow as tf
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import f1_score, recall_score, precision_score
from string import punctuation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
def pre_process():
newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer()
features = vectorizer.fit_transform(newsgroups_data.data)
lb = LabelBinarizer()
labels = np.reshape(newsgroups_data.target, [-1])
labels = lb.fit_transform(labels)
return features, labels
def get_batches(x, y, batch_size=1):
for ii in range(0, len(y), batch_size):
yield x[ii:ii + batch_size], y[ii:ii + batch_size]
def plot_error(errorplot, datapoint, numberOfWrongPreds):
errorplot.set_xdata(np.append(errorplot.get_xdata(), datapoint))
errorplot.set_ydata(np.append(errorplot.get_ydata(), numberOfWrongPreds))
errorplot.autoscale(enable=True, axis='both', tight=None)
plt.draw()
def train_test():
features, labels = pre_process()
#Defining Hyperparameters
epochs = 1
lstm_layers = 1
batch_size = 1
lstm_size = 30
learning_rate = 0.003
print(lstm_size)
print(batch_size)
print(epochs)
#--------------placeholders-------------------------------------
# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
tf.set_random_seed(1)
inputs_ = tf.placeholder(tf.float32, [None,None], name = "inputs")
# labels_ = tf.placeholder(dtype= tf.int32)
labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")
#getting dynamic batch size according to the input tensor size
# dynamic_batch_size = tf.shape(inputs_)[0]
#output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.
keep_prob = tf.placeholder(tf.float32, name = "keep_prob")
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
#Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
inputs_ = tf.expand_dims(inputs_, 0)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
#hidden layer
hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)
logit = tf.contrib.layers.fully_connected(hidden, 1, activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
saver = tf.train.Saver()
# ----------------------------online training-----------------------------------------
with tf.Session(graph=graph) as sess:
tf.set_random_seed(1)
sess.run(tf.global_variables_initializer())
iteration = 1
state = sess.run(initial_state)
wrongPred = 0
errorplot, = plt.plot([], [])
for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):
feed = {inputs_: x.toarray(),
labels_: y,
keep_prob: 0.5,
initial_state: state}
predictions = tf.round(tf.nn.softmax(logit)).eval(feed_dict=feed)
print("----------------------------------------------------------")
print("Iteration: {}".format(iteration))
print("Prediction: ", predictions)
print("Actual: ",y)
pred = np.array(predictions)
print(pred)
print(y)
if not ((pred==y).all()):
wrongPred += 1
if ii % 27 == 0:
plot_error(errorplot,ii,wrongPred)
loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
print("Train loss: {:.3f}".format(loss))
iteration += 1
saver.save(sess, "checkpoints/sentiment.ckpt")
errorRate = wrongPred/len(labels)
print("ERROR RATE: ", errorRate )
if __name__ == '__main__':
train_test()
ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
This error is given because you don't specify the size nor the amount of inputs.
I got the script working like this:
inputs_ = tf.placeholder(tf.float32, [1,None], name = "inputs")
inputs_withextradim = tf.expand_dims(inputs_, 2)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_withextradim, initial_state=initial_state)

Resources