Logistic Regression Cost = nan - python-3.x

I am trying to implement logistic regression model but keep getting 'nan' values as cost. I tried it with multiple data set but it gives the same result. Different sources gives slightly different implementation of gradient descent so I am not sure if the implementation of gradient is correct here. Here is full code:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
class LogisticRegression:
def __init__(self, lr=0.001, n_iter=8000):
self.lr = lr
self.n_iter = n_iter
self.weights = None
"""
z is dot product of features and weights, which is then mapped to discrete values, such as between 0 and 1
"""
def sigmoid(self, z):
return 1.0/(1+np.exp(-z))
def predict(self, x_features, weights):
"""Returns 1d array of probabilities that the class label == 1"""
z = np.dot(x_features, weights)
return self.sigmoid(z)
def cost(self, x_features, labels, weights):
"""
Using Mean Absolute Error
Cost = (labels*log(predictions) + (1-labels)*log(1-predictions) ) / len(labels)
"""
observation = len(labels)
predictions = self.predict(x_features, weights)
#take the error when label = 1
class1_cost = -labels*np.log(predictions)
#take the error when label = 0
class2_cost = (1-labels)*np.log(1-predictions)
#take sum of both the cost
cost = class1_cost+class2_cost
#take the average cost
cost = cost.sum()/observation
return cost
def update_weight(self, x_features, labels, weights):
"""
Vectorized Gradient Descent
"""
N = len(x_features)
#get predictions (approximation of y)
predictions = self.predict(x_features, weights)
gradient = np.dot(x_features.T, predictions-labels)
#take the average cost of derivative for each feature
gradient /= N
#multiply gradients by our learning rate
gradient *= self.lr
#subtract from our weights to minimize cost
weights -= gradient
return weights
def give_predictions(self, x_features, weights):
linear_model_prediction = self.predict(x_features, weights)
y_predicted_cls = [1 if i>0.5 else 0 for i in linear_model_prediction]
return y_predicted_cls
def train(self, features, labels):
n_samples, n_features = features.shape
self.weights = np.zeros((n_features,1)) #initialize the weight matrix
cost_history = []
for i in range(self.n_iter):
self.weights = self.update_weight(features, labels, self.weights)
#calculate error for auditing purposes
cost = self.cost(features, labels, self.weights)
cost_history.append(cost)
#Log process
if i%1000 == 0:
print("iter: {}, cost: {}".format(str(i),str(cost)))
return self.weights, cost_history
def generate_data():
bc = datasets.load_breast_cancer()
x_features, labels = bc.data, bc.target
x_train, x_test, y_train, y_test = train_test_split(x_features, labels, test_size=0.2, random_state=1234)
return x_train, x_test, y_train, y_test
x_train, x_test, y_train, y_test = generate_data()
model = LogisticRegression()
model.train(x_train, y_train)

I had to apply feature scaling to x_train before training the model. I used sklearn StandardScaler library
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
x_train = sc_X.fit_transform(x_train)

Your cost function seems correct, but you need to have 'y' as a vector of zeros and a one (one_hot_encoding).

Related

How to check accuracy on BCELoss Pytorch?

I'm trying to use Pytorch to take a HeartDisease.csv and predict whether the patient has heart disease or not... the .csv provides 13 inputs and 1 target
I'm using BCELoss and I'm having trouble understanding how to write an accuracy check function.
My num_samples is correct but not my num_correct. I think this is a result of not understanding the predictions tensor. Right now my num_correct is usually over 8000 while my num_samples is 303...
Any insight on how to write this check accuracy function is much appreciated
I wrote this on a google co lab
#imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
#create fully connected network
class NN(nn.Module):
def __init__(self, input_size, num_classes):
super(NN, self).__init__()
self.outputs = nn.Linear(input_size, 1)
def forward(self, x):
x = self.outputs(x)
return torch.sigmoid(x)
#set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#hyperparameters
input_size = 13 # 13 inputs
num_classes = 1 # heartdisease or not
learning_rate = 0.001
batch_size = 64
num_epochs = 1
#load data
class MyDataset(Dataset):
def __init__(self, root, n_inp):
self.df = pd.read_csv(root)
self.data = self.df.to_numpy()
self.x , self.y = (torch.from_numpy(self.data[:,:n_inp]),
torch.from_numpy(self.data[:,n_inp:]))
def __getitem__(self, idx):
return self.x[idx, :], self.y[idx,:]
def __len__(self):
return len(self.data)
train_dataset = MyDataset("heart.csv", input_size)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle =True)
test_dataset = MyDataset("heart.csv", input_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle =True)
#initialize network
model = NN(input_size=input_size, num_classes=num_classes).to(device)
#loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#train network
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(train_loader):
#get data to cuda if possible
data = data.to(device=device)
targets = targets.to(device=device)
#forward
scores = model(data.float())
targets = targets.float()
loss = criterion(scores, targets)
#backward
optimizer.zero_grad()
loss.backward()
#grad descent or adam step
optimizer.step()
#check accuracy of model
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x.float())
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print("Got {} / {} with accuracy {}".format(num_correct, num_samples, float(num_correct)/float(num_samples)*100))
model.train()
print("checking accuracy on training data")
check_accuracy(train_loader, model)
print("checking accuracy on test data")
check_accuracy(test_loader, model)
Note: Don't fool yourself. A single linear layer + a sigmoid + BCE loss = logistic regression. This is a linear model, so just take note of that when referring to it as a "neural network", which is a term usually reserved for similar networks but with at least one hidden layer and nonlinear activations.
The sigmoid layer at the end of your model's forward() function returns an (N,1)-sized tensor, where N is the batch size. In other words, it returns a scalar for every data point. Each scalar is a value between 0 and 1 (this is the range of the sigmoid function).
The idea is to interpret those scalars as probabilities corresponding to the positive class. Suppose 1 corresponds to heart disease, and 0 corresponds to no heart disease; heart disease is the positive class, and no heart disease is the negative class. Now suppose a score is 0.6. This might be interpreted as a 60% chance that the associated label is heart disease, and a 40% chance that the associated label is no heart disease. This interpretation of the sigmoid output is what motivates the BCE loss to begin with (it's ultimately just a negative log likelihood).
So what you might do is check if your scores are greater than 0.5. If so, predict heart disease. If not, predict no heart disease.
Right now, you're computing maximums from the scores across dimension 1, which does nothing because dimension 1 is already of size 1; taking the maximum of a single value simply gives you that value.
Try something like this:
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x.float())
// Create a Boolean tensor (True for scores > 0.5, False for others)
// and then cast it to a long tensor (Trues -> 1, Falses -> 0)
predictions = (scores > 0.5).long()
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print("Got {} / {} with accuracy {}".format(num_correct, num_samples, float(num_correct)/float(num_samples)*100))
model.train()
You may also want to squeeze your prediction and target tensors to size (N) instead of (N,1), though I'm not sure it's necessary in your case.

How can the coefficients of the multiclass logistic regression model be used to predict the probabilities of class membership of observations?

I am trying to solve one problem that resembles that of Fisher's irises classification. The problem is that I can train the model on my computer, but the given model has to predict class membership on a computer where it is impossible to install python and scikit learn. I want to understand how, having received the coefficients of the logistic regression model, I can predict the belonging to a certain class without using the predict method of the model.
Using the Fisher problem as an example, I do the following.
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, f1_score
# data preparation
iris = load_iris()
data = pd.DataFrame(data=np.hstack([iris.data, iris.target[:, np.newaxis]]),
columns=iris.feature_names + ['target'])
names = data.columns
# split data
X_train, X_test, y_train, y_test = train_test_split(data[names[:-1]], data[names[-1]], random_state=42)
# train model
cls = make_pipeline(
StandardScaler(),
LogisticRegression(C=2, random_state=42)
)
cls = cls.fit(X_train.to_numpy(), y_train)
preds_train = cls.predict(X_train)
# prediction
preds_test = cls.predict(X_test)
# scores
train_score = accuracy_score(preds_train, y_train), f1_score(preds_train, y_train, average='macro') # on train data
# train_score = (0.9642857142857143, 0.9653621232568601)
test_score = accuracy_score(preds_test, y_test), f1_score(preds_test, y_test, average='macro') # on test data
# test_score = (1.0, 1.0)
# model coefficients
cls[1].coef_, cls[1].intercept_
>>> (array([[-1.13948079, 1.30623841, -2.21496793, -2.05617771],
[ 0.66515676, -0.2541143 , -0.55819748, -0.86441227],
[ 0.47432404, -1.05212411, 2.77316541, 2.92058998]]),
array([-0.35860337, 2.43929019, -2.08068682]))
Now I have the coefficients of the model. And I want to use them to make predictions.
First, I make a prediction using the predict method for the first five observations on the test sample.
preds_test = cls.predict_proba(X_test)
preds_test[0:5]
>>>array([[5.66019001e-03, 9.18455687e-01, 7.58841233e-02],
[9.75854479e-01, 2.41455095e-02, 1.10881450e-08],
[1.18780156e-09, 6.53295166e-04, 9.99346704e-01],
[6.71574900e-03, 8.14174200e-01, 1.79110051e-01],
[6.98756622e-04, 8.09096425e-01, 1.90204818e-01]])
Then I manually calculate the predictions of the class probabilities for the observations using the coefficients of the model.
# define two functions for making predictions
def logit(x, w):
return np.dot(x, w)
# from here: https://stackoverflow.com/questions/34968722/how-to-implement-the-softmax-function-in-python
def softmax(z):
assert len(z.shape) == 2
s = np.max(z, axis=1)
s = s[:, np.newaxis] # necessary step to do broadcasting
e_x = np.exp(z - s)
div = np.sum(e_x, axis=1)
div = div[:, np.newaxis] # dito
return e_x / div
n, k = X_test.shape
X_ = np.hstack((np.ones((n, 1)), X_test)) # add column with 1 for intercept
weights = np.hstack((cls[1].intercept_[:, np.newaxis], cls[1].coef_)) # create weights matrix
results = softmax(logit(X_, weights.T)) # calculate probabilities
results[0:5]
>>>array([[3.67343725e-14, 4.63938438e-06, 9.99995361e-01],
[2.81976786e-05, 8.63083152e-01, 1.36888650e-01],
[1.24572182e-22, 5.47800683e-11, 1.00000000e+00],
[3.32990060e-14, 3.08352323e-06, 9.99996916e-01],
[2.66415118e-15, 1.78252465e-06, 9.99998217e-01]])
If you compare the two results obtained (preds_test[0:5] and results[0:5]), you can see that they do not coincide at all. Please explain me what I am doing wrong and how I can use the model's coefficients to calculate predictions without using the predict method.
I forgot that a scaler was applied. If you change the code a little, then the results are the same.
scaler = StandardScaler()
scaler.fit(X_train)
X_test_transf = scaler.transform(X_test)
def logit(x, w):
return np.dot(x, w)
def softmax(z):
assert len(z.shape) == 2
s = np.max(z, axis=1)
s = s[:, np.newaxis] # necessary step to do broadcasting
e_x = np.exp(z - s)
div = np.sum(e_x, axis=1)
div = div[:, np.newaxis] # dito
return e_x / div
n, k = X_test_transf.shape
X_ = np.hstack((np.ones((n, 1)), X_test_transf))
weights = np.hstack((cls[1].intercept_[:, np.newaxis], cls[1].coef_))
results = softmax(logit(X_, weights.T))
np.allclose(preds_test, results)
>>>True
There are two values for every predict_proba. The first value is the probability of the event not occurring and the probability of the event occurring. predict_proba(X)[:,1] to get the probability of the event occurring.

Customized neural network gives 0 accuracy for iris dataset

I've built a neural network for iris classification with the following code:
from sklearn import datasets
from scipy.optimize import minimize
import numpy as np
def train_test_split(X, y):
idx = np.arange(len(X))
train_size = int(len(X) * 0.2)
np.random.shuffle(idx)
X = X[idx]
y = y[idx]
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
return X_train, X_test, y_train, y_test
iris = datasets.load_iris()
X = iris.data
y = iris.target
nb_classes = 3
targets = np.array([y]).reshape(-1)
Y = np.eye(nb_classes)[targets]
# randomize = np.arange(len(X))
# np.random.shuffle(randomize)
# X = X[randomize]
# Y = Y[randomize]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train)
def optimize(X_train, Y_train, X_val=None, Y_val=None, epochs=10, nodes=[], lr=0.15):
hidden_layers = len(nodes) - 1
weights = init_weights(nodes)
for epoch in range(1, epochs+1):
weights = train(X_train, Y_train, lr, weights)
if(epoch % 20 == 0):
print("Epoch {}".format(epoch))
print("Training accuracy:{}".format(acc(X_train, Y_train, weights)))
if X_val.any():
print("Validation Accuracy:{}".format(acc(X_val, Y_val, weights)))
return weights
def init_weights(nodes):
"""Initialize weights with random values in [-1, 1] (including bias)"""
layers, weights = len(nodes), []
for i in range(1, layers):
w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)]
for j in range(nodes[i])]
weights.append(np.matrix(w))
return weights
def forward(x, weights, layers):
activations, layer_input = [x], x
for j in range(layers):
activation = sigmoid(np.dot(layer_input, weights[j].T))
activations.append(activation)
layer_input = np.append(1, activation) # Augment with bias
return activations
def back(y, activations, weights, layers):
outputFinal = activations[-1]
error = np.matrix(y - outputFinal) # Error at output
for j in range(layers, 0, -1):
currActivation = activations[j]
if(j > 1):
# Augment previous activation
prevActivation = np.append(1, activations[j-1])
else:
# First hidden layer, prevActivation is input (without bias)
prevActivation = activations[0]
delta = np.multiply(error, sigmoid_gradient(currActivation))
weights[j-1] += lr * np.multiply(delta.T, prevActivation)
w = np.delete(weights[j-1], [0], axis=1) # Remove bias from weights
error = np.dot(delta, w) # Calculate error for current layer
return weights
def train(X, Y, lr, weights):
layers = len(weights)
for i in range(len(X)):
x, y = X[i], Y[i]
x = np.matrix(np.append(1, x)) # Augment feature vector
activations = forward(x, weights, layers)
weights = back(y, activations, weights, layers)
return weights
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_gradient(x):
return np.multiply(x, 1-x)
def predict(item, weights):
layers = len(weights)
item = np.append(1, item) # Augment feature vector
##_Forward Propagation_##
activations = forward(item, weights, layers)
outputFinal = activations[-1].A1
index = FindMaxActivation(outputFinal)
# Initialize prediction vector to zeros
y = [0 for i in range(len(outputFinal))]
y[index] = 1 # Set guessed class to 1
return y # Return prediction vector
def FindMaxActivation(output):
"""Find max activation in output"""
m, index = output[0], 0
for i in range(1, len(output)):
if(output[i] > m):
m, index = output[i], i
return index
def acc(X, Y, weights):
"""Run set through network, find overall accuracy"""
correct = 0
for i in range(len(X)):
# x, y = X[i], list(Y[i])
x, y = X[i], Y[i].tolist()
guess = predict(x, weights)
if(y == guess):
# Guessed correctly
correct += 1
return correct / len(X)
f = len(X[0]) # Number of features
o = len(Y[0]) # Number of outputs / classes
layers = [f, 5, 10, o] # Number of nodes in layers
lr, epochs = 0.15, 100
weights = optimize(X_train, Y_train, X_val, Y_val, epochs=epochs, nodes=layers, lr=lr);
print("Testing Accuracy: {}".format(acc(X_test, Y_test, weights)))
But it gives the result with accuracy of 0s:
Epoch 20
Training accuracy:0.0
Validation Accuracy:0.0
Epoch 40
Training accuracy:0.0
Validation Accuracy:0.0
Epoch 60
Training accuracy:0.0
Validation Accuracy:0.0
Epoch 80
Training accuracy:0.0
Validation Accuracy:0.0
Epoch 100
Training accuracy:0.0
Validation Accuracy:0.0
Testing Accuracy: 0.0
However, if I use dataset with csv format downloaded from here
iris = pd.read_csv("./data/Iris.csv")
iris = iris.sample(frac=1).reset_index(drop=True) # Shuffle
X = iris[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
X = np.array(X)
from sklearn.preprocessing import OneHotEncoder
one_hot_encoder = OneHotEncoder(sparse=False)
Y = iris.Species
Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))
Y[:5]
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1)
Output:
Epoch 20
Training accuracy:0.9385964912280702
Validation Accuracy:0.9230769230769231
Epoch 40
Training accuracy:0.9912280701754386
Validation Accuracy:0.9230769230769231
Epoch 60
Training accuracy:0.9736842105263158
Validation Accuracy:0.9230769230769231
Epoch 80
Training accuracy:0.9736842105263158
Validation Accuracy:0.9230769230769231
Epoch 100
Training accuracy:0.9824561403508771
Validation Accuracy:0.9230769230769231
Testing Accuracy: 0.9565217391304348
Why this difference?

Calculate recall metric when StratifiedShuffleSplit is used

The following method uses the KNN classifier with StratifiedShuffleSplit since I have an imbalanced dataset:
def KNN(train_x, train_y):
skf = StratifiedShuffleSplit()
scores = []
for train, test in skf.split(train_x, train_y):
clf = KNeighborsClassifier(n_neighbors=2, n_jobs=-1)
clf.fit(train_x.loc[train], train_y.loc[train])
score = clf.score(train_x.loc[test], train_y.loc[test])
scores.append(score)
res = np.asarray(scores).mean()
print(res)
How can I modify the scores to calculate the recall and precision metrics instead of the default accuracy?
Thank you,
You need:
sklearn.metrics.recall_score(y_true, y_pred)
sklearn.metrics.precision_score(y_true, y_pred)
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
def KNN(train_x, train_y):
skf = StratifiedShuffleSplit()
scores = []
scores2 = []
for train, test in skf.split(train_x, train_y):
clf = KNeighborsClassifier(n_neighbors=2, n_jobs=-1)
clf.fit(train_x.loc[train], train_y.loc[train])
y_pred = clf.predict(train_x.loc[test]) # predict the labels of the test set
y_true = train_y.loc[test] # get the true labels of the test test
score = recall_score(y_true, y_pred) # recall estimation
score2 = precision_score(y_true, y_pred) # precision estimation
scores.append(score)
scores2.append(score2)

Input size (depth of inputs) must be accessible via shape inference, but saw value None error whaen trying to set tf.expand_dims axis to 0

I am trying to use 20 news groups data set available in sklearn to train a LSTM to do incremental learning (classification). I used the sklearn's TfidfVectorizer to pre-process the data. Then I turned the resulting sparse matrix into a numpy array before feeding it. After that when coding the below line:
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
It gave an error saying that the 'inputs_' should have 3 dimensions. so I used:
inputs_ = tf.expand_dims(inputs_, 0)
To expand the dimension. But when I do that i get the error:
ValueError: Input size (depth of inputs) must be accessible via shape
inference, but saw value None.
The shape of 'input_' is:
(1, 134410)
I already went through this post, but it did not help.
I cannot seem to understand how to solve this issue. Any help is much appreciated. Thank you in advance!
show below is my complete code:
import os
from collections import Counter
import tensorflow as tf
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import f1_score, recall_score, precision_score
from string import punctuation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
def pre_process():
newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer()
features = vectorizer.fit_transform(newsgroups_data.data)
lb = LabelBinarizer()
labels = np.reshape(newsgroups_data.target, [-1])
labels = lb.fit_transform(labels)
return features, labels
def get_batches(x, y, batch_size=1):
for ii in range(0, len(y), batch_size):
yield x[ii:ii + batch_size], y[ii:ii + batch_size]
def plot_error(errorplot, datapoint, numberOfWrongPreds):
errorplot.set_xdata(np.append(errorplot.get_xdata(), datapoint))
errorplot.set_ydata(np.append(errorplot.get_ydata(), numberOfWrongPreds))
errorplot.autoscale(enable=True, axis='both', tight=None)
plt.draw()
def train_test():
features, labels = pre_process()
#Defining Hyperparameters
epochs = 1
lstm_layers = 1
batch_size = 1
lstm_size = 30
learning_rate = 0.003
print(lstm_size)
print(batch_size)
print(epochs)
#--------------placeholders-------------------------------------
# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
tf.set_random_seed(1)
inputs_ = tf.placeholder(tf.float32, [None,None], name = "inputs")
# labels_ = tf.placeholder(dtype= tf.int32)
labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")
#getting dynamic batch size according to the input tensor size
# dynamic_batch_size = tf.shape(inputs_)[0]
#output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.
keep_prob = tf.placeholder(tf.float32, name = "keep_prob")
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
#Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
inputs_ = tf.expand_dims(inputs_, 0)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
#hidden layer
hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)
logit = tf.contrib.layers.fully_connected(hidden, 1, activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
saver = tf.train.Saver()
# ----------------------------online training-----------------------------------------
with tf.Session(graph=graph) as sess:
tf.set_random_seed(1)
sess.run(tf.global_variables_initializer())
iteration = 1
state = sess.run(initial_state)
wrongPred = 0
errorplot, = plt.plot([], [])
for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):
feed = {inputs_: x.toarray(),
labels_: y,
keep_prob: 0.5,
initial_state: state}
predictions = tf.round(tf.nn.softmax(logit)).eval(feed_dict=feed)
print("----------------------------------------------------------")
print("Iteration: {}".format(iteration))
print("Prediction: ", predictions)
print("Actual: ",y)
pred = np.array(predictions)
print(pred)
print(y)
if not ((pred==y).all()):
wrongPred += 1
if ii % 27 == 0:
plot_error(errorplot,ii,wrongPred)
loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
print("Train loss: {:.3f}".format(loss))
iteration += 1
saver.save(sess, "checkpoints/sentiment.ckpt")
errorRate = wrongPred/len(labels)
print("ERROR RATE: ", errorRate )
if __name__ == '__main__':
train_test()
ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
This error is given because you don't specify the size nor the amount of inputs.
I got the script working like this:
inputs_ = tf.placeholder(tf.float32, [1,None], name = "inputs")
inputs_withextradim = tf.expand_dims(inputs_, 2)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_withextradim, initial_state=initial_state)

Resources