I'm trying to implement a simple neural network using tensorflow with gpu on Jupyter Notebook, but it fails every time to create a session, I have traced the code many times and reduced the number of iterations to 10, and also reduced the number of input tain and test data,just for testing if it was a problem of computational power. My network have 2 hidden layers; the first contains 3 neurons and the second contains 2.
here is my code:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import math
import tensorflow as tf
from tensorflow.python.framework import ops
from preprocessing import load_dataset
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001):
"""
Arguments:
X_train -- training set, of shape (input size , number of training examples )
Y_train -- test set, of shape (output size , number of training examples )
X_test -- training set, of shape (input size , number of test examples )
Y_test -- test set, of shape (output size, number of test examples )
learning_rate -- learning rate of the optimization
Returns:
parameters -- parameters learnt by the model.
"""
ops.reset_default_graph()
X, Y = create_placeholders()
parameters = initialize_parameters()
Z3 = forward_propagation(X, parameters)
cost = compute_cost(Z3, Y)
print("X = " + str(X))
print("Y = " + str(Y))
print("Z3 = " + str(Z3))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(10):
print(sess.run([optimizer, cost], feed_dict={X: X_train, Y: Y_train}))
parameters = sess.run(parameters)
print("Parameters have been trained!")
correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
X_train, Y_train, X_test, Y_test=load_dataset()
parameters= model(X_train, Y_train, X_test, Y_test)
and here are the functions that I'm calling
def create_placeholders():
X = tf.placeholder(dtype=tf.float32, shape=(28755648, 5), name="X")
Y = tf.placeholder(dtype=tf.float32, shape=(1, 5), name="Y")
return X, Y
def initialize_parameters():
W1 = tf.get_variable("W1", [3, 28755648], initializer=tf.contrib.layers.xavier_initializer(seed=0))
b1 = tf.get_variable("b1", [3, 1], initializer=tf.zeros_initializer())
W2 = tf.get_variable("W2", [2, 3], initializer=tf.contrib.layers.xavier_initializer(seed=0))
b2 = tf.get_variable("b2", [2, 1], initializer=tf.zeros_initializer())
W3 = tf.get_variable("W3", [1, 2], initializer=tf.contrib.layers.xavier_initializer(seed=0))
b3 = tf.get_variable("b3", [1, 1], initializer=tf.zeros_initializer())
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
return parameters
def forward_propagation(X, parameters):
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
Z1 = tf.add(tf.matmul(W1, X), b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2, A1), b2)
A2 = tf.nn.relu(Z2)
Z3 = tf.add(tf.matmul(W3, A2), b3)
return Z3
def compute_cost(Z3, Y):
logits = Z3
labels = Y
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
return cost
And every time I run the code I got the following error:
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-42-9ce12ddb96d9> in <module>()
----> 1 parameters= model(X_train, Y_train, X_test, Y_test)
<ipython-input-38-ab5d84d97720> in model(X_train, Y_train, X_test, Y_test, learning_rate)
42
43 # Start the session to compute the tensorflow graph
---> 44 with tf.Session() as sess:
45
46 # Run the initialization
C:\Users\Chaymae\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in __init__(self, target, graph, config)
1480
1481 """
-> 1482 super(Session, self).__init__(target, graph, config=config)
1483 # NOTE(mrry): Create these on first `__enter__` to avoid a reference cycle.
1484 self._default_graph_context_manager = None
C:\Users\Chaymae\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in __init__(self, target, graph, config)
620 # pylint: enable=protected-access
621 else:
--> 622 self._session = tf_session.TF_NewDeprecatedSession(opts, status)
623 finally:
624 tf_session.TF_DeleteSessionOptions(opts)
C:\Users\Chaymae\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
471 None, None,
472 compat.as_text(c_api.TF_Message(self.status.status)),
--> 473 c_api.TF_GetCode(self.status.status))
474 # Delete the underlying status object from memory otherwise it stays alive
475 # as there is a reference to status from this from the traceback due to
InternalError: Failed to create session.
On the other hand, I have tried to create new session and run a simple tensorflow graph, and it worked well.
If anyone could help in my case I would be pleased
Put tf.reset_default_graph() before creating the tf.Session() should solve the problem.
Related
I have a dataset with the following shapes: (2400, 2) (2400,) (1600, 2) (1600,)
My task is to perform non-linear separable classification by binary logistic regression.
But I get the following error in visualization part:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-2754b9327868> in <module>()
4
5 # Plot different regions and color them
----> 6 output = output.reshape(x_vals.shape)
7 plt.imshow(output, interpolation='nearest',
8 extent=(x_min, x_max, y_min, y_max),
ValueError: cannot reshape array of size 2880000 into shape (1200,1200)
How can I reshape array into matrix?
Below is my implementation for the reference:
num_features = 2
learning_rate = 0.0001
training_steps = 4000
batch_size = 32
display_step = 50
x_train, y_train = map(list, zip(*[(x,y) for x,y in zip(x_train, y_train) if y==0 or y==1]))
x_test, y_test = map(list, zip(*[(x,y) for x,y in zip(x_test, y_test) if y==0 or y==1]))
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
y_train, y_test = np.array(y_train, np.int64), np.array(y_test, np.int64)
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])
x_train, x_test = x_train/255., x_test/255.
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)
b = tf.Variable(tf.ones((num_features, 2)) * 0.000001, name = "weight")
b0 = tf.Variable(0., name = "bias")
def logistic_regression(x, b, b0):
return 1. / (1. + tf.exp(-tf.matmul(x, b) - b0))
def loglikelihood(p, y_true):
return tf.reduce_sum(tf.one_hot(y_true, 2) * tf.math.log(p), axis=-1)
def accuracy(y_pred, y_true):
correct_prediction = tf.equal(tf.argmax(y_pred, axis=-1), y_true)
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
optimizer = tf.optimizers.Adam()
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
with tf.GradientTape() as g:
g.watch([b, b0])
p = logistic_regression(batch_x, b, b0)
ll = loglikelihood(p, batch_y)
ll_sum = tf.reduce_mean(ll)
grad_b, grad_b0 = g.gradient(ll_sum, [b, b0])
optimizer.apply_gradients(zip([grad_b, grad_b0], [b, b0]))
if step % display_step == 0:
p = logistic_regression(batch_x, b, b0)
acc = accuracy(p, batch_y)
p = logistic_regression(x_test, b, b0)
val_acc = accuracy(p, y_test)
print("step: %i, acc: %f, val_acc %f" % (step, acc, val_acc))
def predict(x_test):
return tf.round(logistic_regression(x_test, b, b0))
import numpy as np
x_min, y_min = -12, -12
x_max, y_max = 12, 12
x_vals, y_vals = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))
xy_grid = pd.DataFrame(zip(x_vals.ravel(), y_vals.ravel()), dtype=np.float32)
# Predict output labels for all the points on the grid
output = predict(xy_grid.to_numpy()).numpy()
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)
# Plot different regions and color them
output = output.reshape(x_vals.shape)
plt.imshow(output, interpolation='nearest',
extent=(x_min, x_max, y_min, y_max),
cmap=plt.cm.Paired,
aspect='auto',
origin='lower')
pd.DataFrame(np.concatenate([x_train,
np.expand_dims(y_train, axis=-1)], axis=1)).plot.scatter(0, 1, c=2, colormap='viridis', ax=ax)
The expected result should be like this:
expected image
But I get the following image:
resulting image
The error is caused by x_vals.shape being too small. Hence numpy cannot reshape the data as you would be losing some data.
You need to change the shape of x_vals to be whatever shape output you expect you see.
For example, if you want a 1200x2400 image you can do:
x_min, y_min = -12, -12
x_max, y_max = 12, 12
x_vals, y_vals = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.02))
print(x_vals.shape) # shows x_vals.shape = (1200, 2400) which is 28880000 elements
output = np.arange(2880000)
output = output.reshape(x_vals.shape) # this will now work
I am using TensorFlow 2.0 with Python 3.7.5 to build a neural network for Iris classification using Model sub-classing approach.
The code I have is as follows:
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Input
import pandas as pd
import numpy as np
# Read in data-
data = pd.read_csv("iris.csv")
# Get data types for different attributes-
data.dtypes
'''
sepallength float64
sepalwidth float64
petallength float64
petalwidth float64
class object
dtype: object
'''
# Get shape of data-
data.shape
# (150, 5)
# Check for missing values-
data.isnull().values.any()
# False
# Perform label encoding for target variable-
# Initialize a label encoder-
le = LabelEncoder()
# Label encode target attribute-
data['class'] = le.fit_transform(data['class'])
# Get different classes which are label encoded-
le.classes_
# array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)
# Split data into features (X) and target (y)-
X = data.drop('class', axis = 1)
y = data['class']
# Get training & testing sets using features and labels-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# Convert from Pandas to numpy arrays-
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
print("\nTraining and Testing set dimensions:")
print("X_train.shape = {0}, y_train.shape = {1}".format(X_train.shape, y_train.shape))
print("X_test.shape = {0}, y_test.shape = {1}\n".format(X_test.shape, y_test.shape))
# Training and Testing set dimensions:
# X_train.shape = (105, 4), y_train.shape = (105,)
# X_test.shape = (45, 4), y_test.shape = (45,)
class IrisClassifier(Model):
def __init__(self):
super(IrisClassifier, self).__init__()
'''
self.layer1 = Dense(
units = 4, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
'''
self.input_layer = Input(
shape = (4,)
)
self.layer1 = Dense(
units = 10, activation = 'relu',
input_dim = 4,
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
self.layer2 = Dense(
units = 10, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
self.outputlayer = Dense(
units = 3, activation = 'softmax'
)
def call(self, x):
x = self.input_layer(x)
x = self.layer1(x)
x = self.layer2(x)
# x = self.layer3(x)
return self.outputlayer(x)
# Instantiate a model of defined neural network class-
model = IrisClassifier()
# Define EarlyStopping callback-
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
# Compile defined model-
model.compile(
optimizer=tf.keras.optimizers.Adam(lr = 0.001),
loss = 'sparse_categorical_crossentropy',
metrics = ['accuracy']
)
# Train model-
history2 = model.fit(
x = X_train, y = y_train,
validation_data = [X_test, y_test],
epochs = 50, batch_size = 16,
callbacks = [callback]
)
When I execute 'history2' code, I get the following error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
3 validation_data = [X_test, y_test],
4 epochs = 50, batch_size = 16,
----> 5 callbacks = [callback]
6 )
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py
in fit(self, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight,
sample_weight, initial_epoch, steps_per_epoch, validation_steps,
validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_arrays.py
in fit(self, model, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight,
sample_weight, initial_epoch, steps_per_epoch, validation_steps,
validation_freq, **kwargs)
640 steps=steps_per_epoch,
641 validation_split=validation_split,
--> 642 shuffle=shuffle)
643
644 if validation_data:
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py
in _standardize_user_data(self, x, y, sample_weight, class_weight,
batch_size, check_steps, steps_name, steps, validation_split, shuffle,
extract_tensors_from_dataset) 2417 # First, we build the model
on the fly if necessary. 2418 if not self.inputs:
-> 2419 all_inputs, y_input, dict_inputs = self._build_model_with_inputs(x, y) 2420 is_build_called =
True 2421 else:
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py
in _build_model_with_inputs(self, inputs, targets) 2580 # or
lists of arrays, and extract a flat list of inputs from the passed
2581 # structure.
-> 2582 training_utils.validate_input_types(inputs, orig_inputs) 2583 2584 if isinstance(inputs, (list, tuple)):
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py
in validate_input_types(inp, orig_inp, allow_dict, field_name) 1149
raise ValueError( 1150 'Please provide as model inputs
either a single array or a list of '
-> 1151 'arrays. You passed: {}={}'.format(field_name, orig_inp)) 1152 1153
ValueError: Please provide as model inputs either a single array or a
list of arrays. You passed: inputs= sepallength sepalwidth
petallength petalwidth 117 7.7 3.8 6.7
2.2 7 5.0 3.4 1.5 0.2 73 6.1 2.8 4.7 1.2 92 5.8 2.6 4.0 1.2 87 6.3 2.3 4.4 1.3 .. ... ... ... ... 93 5.0
2.3 3.3 1.0 30 4.8 3.1 1.6 0.2 25 5.0 3.0 1.6 0.2 31 5.4 3.4 1.5 0.4 97 6.2 2.9 4.3 1.3
[105 rows x 4 columns]
After converting X_train, y_train, X_test and y_test to numpy arrays, when I execute, history2 to train the model, I get the following error:
TypeError: in converted code:
<ipython-input-14-ae6111e00410>:34 call *
x = self.input_layer(x)
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py:427
converted_call
f in m.dict.values() for m in (collections, pdb, copy, inspect, re)):
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py:427
f in m.dict.values() for m in (collections, pdb, copy, inspect, re)):
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/math_ops.py:1336
tensor_equals
return gen_math_ops.equal(self, other)
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_math_ops.py:3627
equal
name=name)
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py:536
_apply_op_helper
repr(values), type(values).name, err))
TypeError: Expected float32 passed to parameter 'y' of op 'Equal', got 'collections' of type 'str' instead. Error: Expected float32, got
'collections' of type 'str' instead.
What's going wrong?
Thanks!
Your problem stems from the way you preprocess your data, before you fit it to your model.
It is highly likely that you pass the entire csv-dataset from iris, including your column headers, hence your issue. You can verify this from
"You passed: inputs= sepallength sepalwidth petallength petalwidth 117
7.7 3.8 6.7".
Ensure that your Xs and ys do not contain the column names, but only the values. Use X_train = X_train.to_numpy() to ensure the conversion works. In older versions, you could also use X_train.values, but the latter has been deprecated.
I solved the problem. According to Francois Chollet:
A subclassed model is a piece of Python code (a call method). There is
no graph of layers here. We cannot know how layers are connected to
each other (because that's defined in the body of call, not as an
explicit data structure), so we cannot infer input / output shapes
Therefore, the following code runs fine (where you don't specify the input training data shape):
# Define EarlyStopping callback-
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
class IrisClassifier(Model):
def __init__(self):
super(IrisClassifier, self).__init__()
self.layer1 = Dense(
units = 10, activation = 'relu',
# input_dim = 4,
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
self.layer2 = Dense(
units = 10, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
self.outputlayer = Dense(
units = 3, activation = 'softmax'
)
def call(self, x):
# x = self.input_layer(x)
x = self.layer1(x)
x = self.layer2(x)
# x = self.layer3(x)
return self.outputlayer(x)
# Instantiate a model of defined neural network class-
model2 = IrisClassifier()
# Compile defined model-
model2.compile(
optimizer=tf.keras.optimizers.Adam(lr = 0.001),
loss = 'sparse_categorical_crossentropy',
metrics = ['accuracy']
)
# Train model-
history2 = model2.fit(
x = X_train, y = y_train,
validation_data = [X_test, y_test],
epochs = 50, batch_size = 16,
callbacks = [callback]
)
Thanks!
I am working within a virtual environment that was setup following https://docs.python.org/3/tutorial/venv.html
In addition I am using Jupyter Notebook.
In my code I am using sklearn.model_selection.cross_val_score(...). It seems that the parameter n_jobs = "1" or "-1" is causing issues such that using "1" I receive no errors. While using "-1" gives me the following error:
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
'''
Traceback (most recent call last):
File "c:\users\chang\ml\lib\site-packages\sklearn\externals\joblib\externals\loky\process_executor.py", line 391, in _process_worker
call_item = call_queue.get(block=True, timeout=timeout)
File "C:\Users\chang\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\queues.py", line 99, in get
if not self._rlock.acquire(block, timeout):
PermissionError: [WinError 5] Access is denied
'''
The above exception was the direct cause of the following exception:
BrokenProcessPool Traceback (most recent call last)
<ipython-input-10-56afe11b41fd> in <module>
11 X_poly = poly.fit_transform(X)
12
---> 13 score = cross_val_score(lgr_clf, X_poly, y, cv=loo, scoring='accuracy', n_jobs=-1).mean()
14 scores.append(score)
15
c:\users\chang\ml\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
400 fit_params=fit_params,
401 pre_dispatch=pre_dispatch,
--> 402 error_score=error_score)
403 return cv_results['test_score']
404
c:\users\chang\ml\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
238 return_times=True, return_estimator=return_estimator,
239 error_score=error_score)
--> 240 for train, test in cv.split(X, y, groups))
241
242 zipped_scores = list(zip(*scores))
c:\users\chang\ml\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
928
929 with self._backend.retrieval_context():
--> 930 self.retrieve()
931 # Make sure that we get a last message telling us we are done
932 elapsed_time = time.time() - self._start_time
c:\users\chang\ml\lib\site-packages\sklearn\externals\joblib\parallel.py in retrieve(self)
831 try:
832 if getattr(self._backend, 'supports_timeout', False):
--> 833 self._output.extend(job.get(timeout=self.timeout))
834 else:
835 self._output.extend(job.get())
c:\users\chang\ml\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
519 AsyncResults.get from multiprocessing."""
520 try:
--> 521 return future.result(timeout=timeout)
522 except LokyTimeoutError:
523 raise TimeoutError()
~\AppData\Local\Programs\Python\Python37-32\lib\concurrent\futures\_base.py in result(self, timeout)
430 raise CancelledError()
431 elif self._state == FINISHED:
--> 432 return self.__get_result()
433 else:
434 raise TimeoutError()
~\AppData\Local\Programs\Python\Python37-32\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.
------------------------------------------------------------------------------
I have a second computer where the code is working, but there is no virtual environment setup.
Running the cmd as administrator does not fix my problem.
I do not have my virtual environment as a environment variable, but I do have C:\Users\chang\AppData\Local\Programs\Python\Python37-32 as an environment variable.
I suspect that I am missing a crucial step while setting up my virtual environment that leads to PermissionError: [WinError 5] Access is denied error.
#!/usr/bin/env python
# coding: utf-8
# In[14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model as skl_lm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, LeaveOneOut, KFold, cross_val_score
from sklearn.preprocessing import PolynomialFeatures
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, precision_score
from sklearn import preprocessing
from sklearn import neighbors
import statsmodels.api as sm
import statsmodels.formula.api as smf
get_ipython().run_line_magic('matplotlib', 'inline')
plt.style.use('seaborn-white')
# In[15]:
df = pd.read_csv('Default.csv', index_col = 0)
df.info()
# In[16]:
##ESTIAMATE TEST ERROR. 3 SPLITS
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
cols = ['student', 'balance', 'income']
X=df[cols]
y=df['default']
X=X.replace("Yes",1)
X=X.replace("No",0)
y=y.replace("Yes",1)
y=y.replace("No",0)
t_prop = 0.5
poly_order = np.arange(1,4) #degrees
r_state = np.arange(3) #number of splits
Z = np.zeros((poly_order.size,r_state.size))
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
for (i,j),v in np.ndenumerate(Z):
poly = PolynomialFeatures(int(X1[i,j]))
X_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.3,)# random_state=42)
y_train_default = (y_train == 1)
y_test_default = (y_test == 1)
lgr_clf = LogisticRegression(solver = "lbfgs")
lgr_clf.fit(X_train, y_train_default)
y_train_pred = lgr_clf.predict(X_train)
y_test_pred = lgr_clf.predict(X_test)
Z[i,j]= metrics.accuracy_score(y_test, y_test_pred)
plt.plot(X1,Z)
plt.title('{} random splits of the data set'.format(max(r_state)+1))
plt.ylabel('Accuracy Score')
plt.ylim(.94,1)
plt.xlabel('Degree of Polynomial')
plt.xlim(1,3)
# In[17]:
##LOOCV
loo = LeaveOneOut()
loo.get_n_splits(df)
scores = list()
X = X[:2500]
y = y[:2500]
for i in poly_order:
poly = PolynomialFeatures(i)
X_poly = poly.fit_transform(X)
score = cross_val_score(lgr_clf, X_poly, y, cv=loo, scoring='accuracy', n_jobs = -1).mean()
scores.append(score)
# k-fold CV
folds = 3
elements = len(df.index)
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
Z3 = np.zeros((poly_order.size,r_state.size))
for (i,j),v in np.ndenumerate(Z3):
poly = PolynomialFeatures(X1[i,j])
X_poly = poly.fit_transform(X)
kf_10 = KFold(n_splits=folds, random_state=Y1[i,j])
Z3[i,j] = cross_val_score(lgr_clf, X_poly, y, cv=kf_10, scoring='accuracy').mean()
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10,4))
# Note: cross_val_score() method return negative values for the scores.
# https://github.com/scikit-learn/scikit-learn/issues/2439
# Left plot
ax1.plot(poly_order, np.array(scores), '-o')
ax1.set_title('LOOCV')
# Right plot
ax2.plot(X1,Z3,'-o')
ax2.set_title('3-fold CV')
for ax in fig.axes:
ax.set_ylabel('Mean Squared Error')
ax.set_xlabel('Degree of Polynomial')
ax.set_ylim(0.9,1)
ax.set_xlim(0.5,3.5)
#ax.set_xticks(range(1,5,2));
# In[18]:
##ESTIAMATE TEST ERROR. 4 SPLITS
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
cols = ['student', 'balance', 'income']
X=df[cols]
y=df['default']
X=X.replace("Yes",1)
X=X.replace("No",0)
y=y.replace("Yes",1)
y=y.replace("No",0)
t_prop = 0.5
poly_order = np.arange(1,4) #degrees
r_state = np.arange(4) #number of splits
Z = np.zeros((poly_order.size,r_state.size))
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
for (i,j),v in np.ndenumerate(Z):
poly = PolynomialFeatures(int(X1[i,j]))
X_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.3,)# random_state=42)
y_train_default = (y_train == 1)
y_test_default = (y_test == 1)
lgr_clf = LogisticRegression(solver = "lbfgs")
lgr_clf.fit(X_train, y_train_default)
y_train_pred = lgr_clf.predict(X_train)
y_test_pred = lgr_clf.predict(X_test)
Z[i,j]= metrics.accuracy_score(y_test, y_test_pred)
plt.plot(X1,Z)
plt.title('{} random splits of the data set'.format(max(r_state)+1))
plt.ylabel('Accuracy Score')
plt.ylim(.94,1)
plt.xlabel('Degree of Polynomial')
plt.xlim(1,3)
# In[19]:
##LOOCV
loo = LeaveOneOut()
loo.get_n_splits(df)
scores = list()
X = X[:2500]
y = y[:2500]
for i in poly_order:
poly = PolynomialFeatures(i)
X_poly = poly.fit_transform(X)
score = cross_val_score(lgr_clf, X_poly, y, cv=loo, scoring='accuracy', n_jobs = -1).mean()
scores.append(score)
# k-fold CV
folds = 4
elements = len(df.index)
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
Z4 = np.zeros((poly_order.size,r_state.size))
for (i,j),v in np.ndenumerate(Z4):
poly = PolynomialFeatures(X1[i,j])
X_poly = poly.fit_transform(X)
kf_10 = KFold(n_splits=folds, random_state=Y1[i,j])
Z4[i,j] = cross_val_score(lgr_clf, X_poly, y, cv=kf_10, scoring='accuracy').mean()
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10,4))
# Note: cross_val_score() method return negative values for the scores.
# https://github.com/scikit-learn/scikit-learn/issues/2439
# Left plot
ax1.plot(poly_order, np.array(scores), '-o')
ax1.set_title('LOOCV')
# Right plot
ax2.plot(X1,Z4,'-o')
ax2.set_title('4-fold CV')
for ax in fig.axes:
ax.set_ylabel('Mean Squared Error')
ax.set_xlabel('Degree of Polynomial')
ax.set_ylim(0.9,1)
ax.set_xlim(0.5,3.5)
#ax.set_xticks(range(1,5,2));
# In[21]:
##ESTIAMATE TEST ERROR. 5 SPLITS
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
cols = ['student', 'balance', 'income']
X=df[cols]
y=df['default']
X=X.replace("Yes",1)
X=X.replace("No",0)
y=y.replace("Yes",1)
y=y.replace("No",0)
t_prop = 0.5
poly_order = np.arange(1,4) #degrees
r_state = np.arange(5) #number of splits
Z = np.zeros((poly_order.size,r_state.size))
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
for (i,j),v in np.ndenumerate(Z):
poly = PolynomialFeatures(int(X1[i,j]))
X_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.3,)# random_state=42)
y_train_default = (y_train == 1)
y_test_default = (y_test == 1)
lgr_clf = LogisticRegression(solver = "lbfgs")
lgr_clf.fit(X_train, y_train_default)
y_train_pred = lgr_clf.predict(X_train)
y_test_pred = lgr_clf.predict(X_test)
Z[i,j]= metrics.accuracy_score(y_test, y_test_pred)
plt.plot(X1,Z)
plt.title('{} random splits of the data set'.format(max(r_state)+1))
plt.ylabel('Accuracy Score')
plt.ylim(.94,1)
plt.xlabel('Degree of Polynomial')
plt.xlim(1,3)
# In[22]:
##LOOCV
loo = LeaveOneOut()
loo.get_n_splits(df)
scores = list()
X = X[:2500]
y = y[:2500]
for i in poly_order:
poly = PolynomialFeatures(i)
X_poly = poly.fit_transform(X)
score = cross_val_score(lgr_clf, X_poly, y, cv=loo, scoring='accuracy', n_jobs = -1).mean()
scores.append(score)
# k-fold CV
folds = 5
elements = len(df.index)
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
Z5 = np.zeros((poly_order.size,r_state.size))
for (i,j),v in np.ndenumerate(Z5):
poly = PolynomialFeatures(X1[i,j])
X_poly = poly.fit_transform(X)
kf_10 = KFold(n_splits=folds, random_state=Y1[i,j])
Z5[i,j] = cross_val_score(lgr_clf, X_poly, y, cv=kf_10, scoring='accuracy').mean()
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10,4))
# Note: cross_val_score() method return negative values for the scores.
# https://github.com/scikit-learn/scikit-learn/issues/2439
# Left plot
ax1.plot(poly_order, np.array(scores), '-o')
ax1.set_title('LOOCV')
# Right plot
ax2.plot(X1,Z5,'-o')
ax2.set_title('5-fold CV')
for ax in fig.axes:
ax.set_ylabel('Mean Squared Error')
ax.set_xlabel('Degree of Polynomial')
ax.set_ylim(0.9,1)
ax.set_xlim(0.5,3.5)
#ax.set_xticks(range(1,5,2));
# In[23]:
#Analysis
#When Comparing the LOOCV to the random split, it can be seen that the
#LOOCV is closest to a linear model with polynomial degree one.
#This is also a true statement when compared to the K-fold CV.
#In addition the number of folds does not cause a huge deviation
#compared to LOOCV. This proves the statement in class that having
#a large or small number of folds does not necessarily make the model better
# In[ ]:
Additional Information/Updates:
2/3/2020
If anybody comes across this, here is a more active thread Github
Thread. Here someone has mentioned a new possible fix, but unsure
yet if it fixes the problem mentioned here. It is related to how the data
is read in, but I doubt that this is the solution.
Small update, I have yet to revisit this problem, but I recently
encountered a similar run time error for a different program (unsure
if it was the exact same run time error. Also unsure if it is correct
to even call this a run time error in the first place). I realized my python
was 32bit for some unknown reason. At this point I upgraded to 64bit
which fixed my problem. I have yet to try this on my old code posted
here. Unsure, but I also need to check if the python was 32bit on my
other machine.
I have an error when trying to fit a linear binary classifier using step function and MSE, instead of softmax and cross-entropy loss. I have and error which I can't overcome probably due to shape inconsistencies. I provide a code sample. Please help
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification as gen_data
from sklearn.model_selection import train_test_split
rng = np.random
# Setting hyperparameters
n_observations = 100
lr = 0.005
n_iter = 100
# Generate input data
xs, ys = gen_data(n_features=2, n_redundant=0, n_informative=2,
random_state=0, n_clusters_per_class=1)
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(xs, ys, test_size=.4)
X_train = np.float32(X_train)
X_test = np.float32(X_test)
# Graph
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(np.float32(rng.randn(2)), name="weight")
b = tf.Variable(np.float32(rng.randn()), name="bias")
def step(x):
is_greater = tf.greater(x, 0)
as_float = tf.to_float(is_greater)
doubled = tf.multiply(as_float, 2)
return tf.subtract(doubled, 1)
Y_pred = step(tf.add(tf.multiply(X , W), b))
cost = tf.reduce_mean(tf.squared_difference(Y_pred, Y))
# Using built-in optimization algorithm to train the model:
train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cost)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for step in range(n_iter):
sess.run(train_step, feed_dict={X:X_train, Y:y_train})
print ("iter: {0}; weight: {1}; bias: {2}".format(step,
sess.run(W),
sess.run(b)))
This is the error:
ValueErrorTraceback (most recent call last)
<ipython-input-17-5a0c4711802c> in <module>()
26
27 # Using built-in optimization algorithm to train the model:
---> 28 train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cost)
29
30 # Using TF differentiation from scratch to implement a step-by-step optimizer
/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.pyc in minimize(self, loss, global_step, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, name, grad_loss)
405 "No gradients provided for any variable, check your graph for ops"
406 " that do not support gradients, between variables %s and loss %s." %
--> 407 ([str(v) for _, v in grads_and_vars], loss))
408
409 return self.apply_gradients(grads_and_vars, global_step=global_step,
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["<tf.Variable 'weight:0' shape=(2,) dtype=float64_ref>", "<tf.Variable 'bias:0' shape=() dtype=float32_ref>", "<tf.Variable 'weight_1:0' shape=(2,) dtype=float64_ref>", "<tf.Variable 'bias_1:0' shape=() dtype=float32_ref>",
Your training data isn't changing between training steps. That is, each training step feeds the same values for X and Y:
for step in range(n_iter):
sess.run(train_step, feed_dict={X:X_train, Y:y_train})
If you set different values for X and Y between training steps, the error should go away.
I don't understand why my code wouldn't run. I started with the TensorFlow tutorial to classify the images in the mnist data set using a single layer feedforward neural net. Then modified the code to create a multilayer perceptron that maps out 37 inputs to 1 output. The input and output training data are being loaded from Matlab data file (.mat)
Here is my code..
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy.io import loadmat
%matplotlib inline
import tensorflow as tf
from tensorflow.contrib import learn
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings('ignore')
sns.set_style('white')
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split
from sklearn.datasets import make_moons
X = np.array(loadmat("Data/DataIn.mat")['TrainingDataIn'])
Y = np.array(loadmat("Data/DataOut.mat")['TrainingDataOut'])
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 10
display_step = 1
dropout_rate = 0.9
# Network Parameters
n_hidden_1 = 19 # 1st layer number of features
n_hidden_2 = 26 # 2nd layer number of features
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
X = tf.placeholder("float32", [None, 37])
Y = tf.placeholder("float32", [None])
def multilayer_perceptron(X, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(X, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(X, weights, biases)
tf.shape(pred)
tf.shape(Y)
print("Prediction matrix:", pred)
print("Output matrix:", Y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
when I run the code I get error messages:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-6b8af9192775> in <module>()
93 # Run optimization op (backprop) and cost op (to get loss value)
94 _, c, p = sess.run([optimizer, cost, pred], feed_dict={X: batch_x,
---> 95 Y: batch_y})
96 # Compute average loss
97 avg_cost += c / total_batch
~\AppData\Local\Continuum\Anaconda3\envs\ann\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
787 try:
788 result = self._run(None, fetches, feed_dict, options_ptr,
--> 789 run_metadata_ptr)
790 if run_metadata:
791 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\AppData\Local\Continuum\Anaconda3\envs\ann\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
973 'Cannot feed value of shape %r for Tensor %r, '
974 'which has shape %r'
--> 975 % (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
976 if not self.graph.is_feedable(subfeed_t):
977 raise ValueError('Tensor %s may not be fed.' % subfeed_t)
ValueError: Cannot feed value of shape (10, 1) for Tensor 'Placeholder_7:0', which has shape '(?,)'
I've encountered this problem before. The difference is that a Tensor of shape (10, 1) looks like [[1], [2], [3]], while a Tensor of shape (10,) looks like [1, 2, 3].
You should be able to fix it by changing the line
Y = tf.placeholder("float32", [None])
to:
Y = tf.placeholder("float32", [None, 1])