How to fix "PermissionError: [WinError 5] Access is denied" in virtual environment and Jupyter notebook caused by n_jobs = -1 - python-3.x

I am working within a virtual environment that was setup following https://docs.python.org/3/tutorial/venv.html
In addition I am using Jupyter Notebook.
In my code I am using sklearn.model_selection.cross_val_score(...). It seems that the parameter n_jobs = "1" or "-1" is causing issues such that using "1" I receive no errors. While using "-1" gives me the following error:
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
'''
Traceback (most recent call last):
File "c:\users\chang\ml\lib\site-packages\sklearn\externals\joblib\externals\loky\process_executor.py", line 391, in _process_worker
call_item = call_queue.get(block=True, timeout=timeout)
File "C:\Users\chang\AppData\Local\Programs\Python\Python37-32\lib\multiprocessing\queues.py", line 99, in get
if not self._rlock.acquire(block, timeout):
PermissionError: [WinError 5] Access is denied
'''
The above exception was the direct cause of the following exception:
BrokenProcessPool Traceback (most recent call last)
<ipython-input-10-56afe11b41fd> in <module>
11 X_poly = poly.fit_transform(X)
12
---> 13 score = cross_val_score(lgr_clf, X_poly, y, cv=loo, scoring='accuracy', n_jobs=-1).mean()
14 scores.append(score)
15
c:\users\chang\ml\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
400 fit_params=fit_params,
401 pre_dispatch=pre_dispatch,
--> 402 error_score=error_score)
403 return cv_results['test_score']
404
c:\users\chang\ml\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
238 return_times=True, return_estimator=return_estimator,
239 error_score=error_score)
--> 240 for train, test in cv.split(X, y, groups))
241
242 zipped_scores = list(zip(*scores))
c:\users\chang\ml\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
928
929 with self._backend.retrieval_context():
--> 930 self.retrieve()
931 # Make sure that we get a last message telling us we are done
932 elapsed_time = time.time() - self._start_time
c:\users\chang\ml\lib\site-packages\sklearn\externals\joblib\parallel.py in retrieve(self)
831 try:
832 if getattr(self._backend, 'supports_timeout', False):
--> 833 self._output.extend(job.get(timeout=self.timeout))
834 else:
835 self._output.extend(job.get())
c:\users\chang\ml\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
519 AsyncResults.get from multiprocessing."""
520 try:
--> 521 return future.result(timeout=timeout)
522 except LokyTimeoutError:
523 raise TimeoutError()
~\AppData\Local\Programs\Python\Python37-32\lib\concurrent\futures\_base.py in result(self, timeout)
430 raise CancelledError()
431 elif self._state == FINISHED:
--> 432 return self.__get_result()
433 else:
434 raise TimeoutError()
~\AppData\Local\Programs\Python\Python37-32\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.
------------------------------------------------------------------------------
I have a second computer where the code is working, but there is no virtual environment setup.
Running the cmd as administrator does not fix my problem.
I do not have my virtual environment as a environment variable, but I do have C:\Users\chang\AppData\Local\Programs\Python\Python37-32 as an environment variable.
I suspect that I am missing a crucial step while setting up my virtual environment that leads to PermissionError: [WinError 5] Access is denied error.
#!/usr/bin/env python
# coding: utf-8
# In[14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model as skl_lm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, LeaveOneOut, KFold, cross_val_score
from sklearn.preprocessing import PolynomialFeatures
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, precision_score
from sklearn import preprocessing
from sklearn import neighbors
import statsmodels.api as sm
import statsmodels.formula.api as smf
get_ipython().run_line_magic('matplotlib', 'inline')
plt.style.use('seaborn-white')
# In[15]:
df = pd.read_csv('Default.csv', index_col = 0)
df.info()
# In[16]:
##ESTIAMATE TEST ERROR. 3 SPLITS
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
cols = ['student', 'balance', 'income']
X=df[cols]
y=df['default']
X=X.replace("Yes",1)
X=X.replace("No",0)
y=y.replace("Yes",1)
y=y.replace("No",0)
t_prop = 0.5
poly_order = np.arange(1,4) #degrees
r_state = np.arange(3) #number of splits
Z = np.zeros((poly_order.size,r_state.size))
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
for (i,j),v in np.ndenumerate(Z):
poly = PolynomialFeatures(int(X1[i,j]))
X_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.3,)# random_state=42)
y_train_default = (y_train == 1)
y_test_default = (y_test == 1)
lgr_clf = LogisticRegression(solver = "lbfgs")
lgr_clf.fit(X_train, y_train_default)
y_train_pred = lgr_clf.predict(X_train)
y_test_pred = lgr_clf.predict(X_test)
Z[i,j]= metrics.accuracy_score(y_test, y_test_pred)
plt.plot(X1,Z)
plt.title('{} random splits of the data set'.format(max(r_state)+1))
plt.ylabel('Accuracy Score')
plt.ylim(.94,1)
plt.xlabel('Degree of Polynomial')
plt.xlim(1,3)
# In[17]:
##LOOCV
loo = LeaveOneOut()
loo.get_n_splits(df)
scores = list()
X = X[:2500]
y = y[:2500]
for i in poly_order:
poly = PolynomialFeatures(i)
X_poly = poly.fit_transform(X)
score = cross_val_score(lgr_clf, X_poly, y, cv=loo, scoring='accuracy', n_jobs = -1).mean()
scores.append(score)
# k-fold CV
folds = 3
elements = len(df.index)
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
Z3 = np.zeros((poly_order.size,r_state.size))
for (i,j),v in np.ndenumerate(Z3):
poly = PolynomialFeatures(X1[i,j])
X_poly = poly.fit_transform(X)
kf_10 = KFold(n_splits=folds, random_state=Y1[i,j])
Z3[i,j] = cross_val_score(lgr_clf, X_poly, y, cv=kf_10, scoring='accuracy').mean()
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10,4))
# Note: cross_val_score() method return negative values for the scores.
# https://github.com/scikit-learn/scikit-learn/issues/2439
# Left plot
ax1.plot(poly_order, np.array(scores), '-o')
ax1.set_title('LOOCV')
# Right plot
ax2.plot(X1,Z3,'-o')
ax2.set_title('3-fold CV')
for ax in fig.axes:
ax.set_ylabel('Mean Squared Error')
ax.set_xlabel('Degree of Polynomial')
ax.set_ylim(0.9,1)
ax.set_xlim(0.5,3.5)
#ax.set_xticks(range(1,5,2));
# In[18]:
##ESTIAMATE TEST ERROR. 4 SPLITS
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
cols = ['student', 'balance', 'income']
X=df[cols]
y=df['default']
X=X.replace("Yes",1)
X=X.replace("No",0)
y=y.replace("Yes",1)
y=y.replace("No",0)
t_prop = 0.5
poly_order = np.arange(1,4) #degrees
r_state = np.arange(4) #number of splits
Z = np.zeros((poly_order.size,r_state.size))
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
for (i,j),v in np.ndenumerate(Z):
poly = PolynomialFeatures(int(X1[i,j]))
X_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.3,)# random_state=42)
y_train_default = (y_train == 1)
y_test_default = (y_test == 1)
lgr_clf = LogisticRegression(solver = "lbfgs")
lgr_clf.fit(X_train, y_train_default)
y_train_pred = lgr_clf.predict(X_train)
y_test_pred = lgr_clf.predict(X_test)
Z[i,j]= metrics.accuracy_score(y_test, y_test_pred)
plt.plot(X1,Z)
plt.title('{} random splits of the data set'.format(max(r_state)+1))
plt.ylabel('Accuracy Score')
plt.ylim(.94,1)
plt.xlabel('Degree of Polynomial')
plt.xlim(1,3)
# In[19]:
##LOOCV
loo = LeaveOneOut()
loo.get_n_splits(df)
scores = list()
X = X[:2500]
y = y[:2500]
for i in poly_order:
poly = PolynomialFeatures(i)
X_poly = poly.fit_transform(X)
score = cross_val_score(lgr_clf, X_poly, y, cv=loo, scoring='accuracy', n_jobs = -1).mean()
scores.append(score)
# k-fold CV
folds = 4
elements = len(df.index)
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
Z4 = np.zeros((poly_order.size,r_state.size))
for (i,j),v in np.ndenumerate(Z4):
poly = PolynomialFeatures(X1[i,j])
X_poly = poly.fit_transform(X)
kf_10 = KFold(n_splits=folds, random_state=Y1[i,j])
Z4[i,j] = cross_val_score(lgr_clf, X_poly, y, cv=kf_10, scoring='accuracy').mean()
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10,4))
# Note: cross_val_score() method return negative values for the scores.
# https://github.com/scikit-learn/scikit-learn/issues/2439
# Left plot
ax1.plot(poly_order, np.array(scores), '-o')
ax1.set_title('LOOCV')
# Right plot
ax2.plot(X1,Z4,'-o')
ax2.set_title('4-fold CV')
for ax in fig.axes:
ax.set_ylabel('Mean Squared Error')
ax.set_xlabel('Degree of Polynomial')
ax.set_ylim(0.9,1)
ax.set_xlim(0.5,3.5)
#ax.set_xticks(range(1,5,2));
# In[21]:
##ESTIAMATE TEST ERROR. 5 SPLITS
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
cols = ['student', 'balance', 'income']
X=df[cols]
y=df['default']
X=X.replace("Yes",1)
X=X.replace("No",0)
y=y.replace("Yes",1)
y=y.replace("No",0)
t_prop = 0.5
poly_order = np.arange(1,4) #degrees
r_state = np.arange(5) #number of splits
Z = np.zeros((poly_order.size,r_state.size))
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
for (i,j),v in np.ndenumerate(Z):
poly = PolynomialFeatures(int(X1[i,j]))
X_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.3,)# random_state=42)
y_train_default = (y_train == 1)
y_test_default = (y_test == 1)
lgr_clf = LogisticRegression(solver = "lbfgs")
lgr_clf.fit(X_train, y_train_default)
y_train_pred = lgr_clf.predict(X_train)
y_test_pred = lgr_clf.predict(X_test)
Z[i,j]= metrics.accuracy_score(y_test, y_test_pred)
plt.plot(X1,Z)
plt.title('{} random splits of the data set'.format(max(r_state)+1))
plt.ylabel('Accuracy Score')
plt.ylim(.94,1)
plt.xlabel('Degree of Polynomial')
plt.xlim(1,3)
# In[22]:
##LOOCV
loo = LeaveOneOut()
loo.get_n_splits(df)
scores = list()
X = X[:2500]
y = y[:2500]
for i in poly_order:
poly = PolynomialFeatures(i)
X_poly = poly.fit_transform(X)
score = cross_val_score(lgr_clf, X_poly, y, cv=loo, scoring='accuracy', n_jobs = -1).mean()
scores.append(score)
# k-fold CV
folds = 5
elements = len(df.index)
X1, Y1 = np.meshgrid(poly_order, r_state, indexing='ij')
Z5 = np.zeros((poly_order.size,r_state.size))
for (i,j),v in np.ndenumerate(Z5):
poly = PolynomialFeatures(X1[i,j])
X_poly = poly.fit_transform(X)
kf_10 = KFold(n_splits=folds, random_state=Y1[i,j])
Z5[i,j] = cross_val_score(lgr_clf, X_poly, y, cv=kf_10, scoring='accuracy').mean()
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10,4))
# Note: cross_val_score() method return negative values for the scores.
# https://github.com/scikit-learn/scikit-learn/issues/2439
# Left plot
ax1.plot(poly_order, np.array(scores), '-o')
ax1.set_title('LOOCV')
# Right plot
ax2.plot(X1,Z5,'-o')
ax2.set_title('5-fold CV')
for ax in fig.axes:
ax.set_ylabel('Mean Squared Error')
ax.set_xlabel('Degree of Polynomial')
ax.set_ylim(0.9,1)
ax.set_xlim(0.5,3.5)
#ax.set_xticks(range(1,5,2));
# In[23]:
#Analysis
#When Comparing the LOOCV to the random split, it can be seen that the
#LOOCV is closest to a linear model with polynomial degree one.
#This is also a true statement when compared to the K-fold CV.
#In addition the number of folds does not cause a huge deviation
#compared to LOOCV. This proves the statement in class that having
#a large or small number of folds does not necessarily make the model better
# In[ ]:
Additional Information/Updates:
2/3/2020
If anybody comes across this, here is a more active thread Github
Thread. Here someone has mentioned a new possible fix, but unsure
yet if it fixes the problem mentioned here. It is related to how the data
is read in, but I doubt that this is the solution.
Small update, I have yet to revisit this problem, but I recently
encountered a similar run time error for a different program (unsure
if it was the exact same run time error. Also unsure if it is correct
to even call this a run time error in the first place). I realized my python
was 32bit for some unknown reason. At this point I upgraded to 64bit
which fixed my problem. I have yet to try this on my old code posted
here. Unsure, but I also need to check if the python was 32bit on my
other machine.

Related

KNN Python implementation

this is what shows when i try running my code:
FutureWarning: Unlike other reduction functions (e.g. skew, kurtosis), the default behavior of mode typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of keepdims will become False, the axis over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set keepdims to True or False to avoid this warning.
lab = mode(labels)
This is my Python code, and i find some difficulties trying find a suited solution:
# Importing the required modules
import numpy as np
from scipy.stats import mode
# Euclidean Distance
def eucledian(p1, p2):
dist = np.sqrt(np.sum((p1 - p2) ** 2))
return dist
# Function to calculate KNN
def predict(x_train, y, x_input, k):
op_labels = []
# Loop through the Datapoints to be classified
for item in x_input:
# Array to store distances
point_dist = []
# Loop through each training Data
for j in range(len(x_train)):
distances = eucledian(np.array(x_train[j, :]), item)
# Calculating the distance
point_dist.append(distances)
point_dist = np.array(point_dist)
# Sorting the array while preserving the index
# Keeping the first K datapoints
dist = np.argsort(point_dist)[:k]
# Labels of the K datapoints from above
labels = y[dist]
** # Majority voting
lab = mode(labels)
lab = lab.mode[0]
op_labels.append(lab)**
return op_labels
# Importing the required modules
# Importing required modules
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from numpy.random import randint
# Loading the Data
iris= load_iris()
# Store features matrix in X
X= iris.data
# Store target vector in
y = iris.target
# Creating the training Data
train_idx = xxx = randint(0, 150, 100)
X_train = X[train_idx]
y_train = y[train_idx]
# Creating the testing Data
test_idx = xxx = randint(0, 150, 50) # taking 50 random samples
X_test = X[test_idx]
y_test = y[test_idx]
# Applying our function
y_pred = predict(X_train, y_train, X_test, 7)
# Checking the accuracy
accuracy_score(y_test, y_pred)
I am expecting a prediction/accuracy to be the prompt.
KNN can be done like this.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
# Assign colum names to the dataset
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
# Read dataset to pandas dataframe
dataset = pd.read_csv(url, names=names)
dataset.head()
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
# Result:
precision recall f1-score support
Iris-setosa 1.00 1.00 1.00 13
Iris-versicolor 1.00 0.89 0.94 9
Iris-virginica 0.89 1.00 0.94 8
accuracy 0.97 30
macro avg 0.96 0.96 0.96 30
weighted avg 0.97 0.97 0.97 30
error = []
# Calculating error for K values between 1 and 40
for i in range(1, 40):
knn = KNeighborsClassifier(n_neighbors=i)
knn.fit(X_train, y_train)
pred_i = knn.predict(X_test)
error.append(np.mean(pred_i != y_test))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')

PyPlot error "X and Y must be same size", everything I've found online isn't working

I'm trying to create a Linear Regression model in Scikit-Learn. Although I've encountered a problem. It's saying that x and y are not the same size. I am using googles "california housing" dataset. Here's the code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset = pd.read_csv('/content/sample_data/california_housing_train.csv')
x = dataset.iloc[:, :-2].values
y = dataset.iloc[:, :-1].values
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 1/3)
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
plt.scatter(x_train, y_train, color = "red")
plt.plot(x_train, lr.predict(x_train), color = "green")
plt.title("Income vs Home Value (Training set)")
plt.xlabel("Income")
plt.ylabel("Home Value")
plt.show()
plt.scatter(x_test, y_test, color = "red")
plt.plot(x_train, lr.predict(x_train), color = "green")
plt.title("Income vs Home Value (Testing set)")
plt.xlabel("Income")
plt.ylabel("Home value")
plt.show()
Error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-47-95095200e54b> in <module>()
18 y_pred = lr.predict(x_test)
19
---> 20 plt.scatter(x_train[0], y_train[:], color = "red")
21 plt.plot(x_train, lr.predict(x_train), color = "green")
22 plt.title("Income vs Home Value (Training set)")
3 frames
/usr/local/lib/python3.7/dist-packages/matplotlib/axes/_axes.py in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, plotnonfinite, **kwargs)
4389 y = np.ma.ravel(y)
4390 if x.size != y.size:
-> 4391 raise ValueError("x and y must be the same size")
4392
4393 if s is None:
ValueError: x and y must be the same size
I have no idea why. I've tried everything on other posts. According to what I've found on other posts, it's because one(x or y) is 2d and one is 1d. Although the "fixes" aren't working.
Look at the dimensions of your x & y variables:
[ins] In [34]: x.shape
Out[34]: (17000, 7)
[ins] In [35]: y.shape
Out[35]: (17000, 8)
The y variable should be the target variable, the home price:
y = dataset.iloc[:,-1].values
Your x-variable definition leaves out the median_income, which is what you are trying to plot, so here is an x matrix that includes the income variable:
x = dataset.iloc[:, :-1].values
With y defined as above it is now 1-dimensional; the x matrix has 8 variables in it, the last of which (index 7) is median_income. To plot it:
plt.scatter(x_train[:,7], y_train, color = "red")

How do I use the MLP score function? Error: shapes (295,1) and (7,450) not aligned: 1 (dim 1) != 7 (dim 0)

I have recently begun coding deep neural networks in Python and I have been stuck with this problem for weeks. I have checked other similar questions but could not grasp the solution.
I have a feed forward neural network and I am trying to obtain the R^2 value for my model. I have a dataframe of 1031 rows by 9 columns. I am fitting the first 7 columns (storm characteristics) with the water elevation as given in 8th or 9th column. Here 'h' is column heading that I am fitting with the first 7 columns.
See csv data at this link.
### Load dependencies
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
data = pd.read_csv (r'C:\Users\SM\Desktop\dataframe.csv')
### Define MLP Hyper-parameters
n = 20 # ----------- Number of layers
p = 450 # ---------- Number of perceptrons in each layer
i = 1 # ------------ Initializing iteration count
MLPsize = []
### While loop to create tuple for MLP hyperparameter input
while i <= n:
MLPsize.append(p)
i += 1
Model = MLPRegressor(hidden_layer_sizes=tuple(MLPsize),
activation='logistic', solver='adam', alpha=0.0001,
batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001,
power_t=0.5, max_iter=5000, shuffle=False, random_state=None, tol=0.0001,
verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
early_stopping=False, validation_fraction=0.5,
beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)
### Define function to run train-test simulation
msk = np.random.rand(len(data)) < 0.7
Train = data[msk]
Test = data[~msk]
Train = Train.reset_index(drop=True)
Test = Test.reset_index(drop=True)
def RunModel (h):
global Train_pred
global Test_pred
global Train_true
global Test_true
global Train_error
global Test_error
Train_error = []
Test_error = []
Model.fit(Train.iloc[:,:7], Train[h])
Train_pred = Model.predict(Train.iloc[:,:7])
Train_error.append(mean_squared_error(Train[h], Train_pred))
Train_true = np.array(Train[h]).reshape(-1, 1)
Train_pred = Train_pred.reshape(-1, 1)
Test_pred = Model.predict(Test.iloc[:,:7])
Test_error.append(mean_squared_error(Test[h], Test_pred))
Test_true = np.array(Test[h]).reshape(-1, 1)
Test_pred = Test_pred.reshape(-1, 1)
print(mean_squared_error(Train_pred, Train_true))
print(Model.score(Test_pred, Test_true))
RunModel('11939')
And I get an error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-01acc869ac62> in <module>
----> 1 Model.score(Test_pred, Test_true)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y, sample_weight)
406 from .metrics import r2_score
407 from .metrics.regression import _check_reg_targets
--> 408 y_pred = self.predict(X)
409 # XXX: Remove the check in 0.23
410 y_type, _, _, _ = _check_reg_targets(y, y_pred, None)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\extmath.py in safe_sparse_dot(a, b, dense_output)
140 return ret
141 else:
--> 142 return np.dot(a, b)
143
144
**ValueError: shapes (295,1) and (7,450) not aligned: 1 (dim 1) != 7 (dim 0)**
Please help. Any detailed explanation would be appreciated.
The line that raises the error is the following:
print(Model.score(Test_pred, Test_true))
This is not valid since .score() method of the MLPRegressor takes as inputs (X,y). Read more here: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor.score
So, if you want to estimate the R2 coefficient of the model you need to pass the Xtest and ytest that in your case are Test.iloc[:,:7], Test_true.
### Load dependencies
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
data = pd.read_csv (r'C:\Users\SM\Desktop\dataframe.csv')
### Define MLP Hyper-parameters
n = 20 # ----------- Number of layers
p = 450 # ---------- Number of perceptrons in each layer
i = 1 # ------------ Initializing iteration count
MLPsize = []
### While loop to create tuple for MLP hyperparameter input
while i <= n:
MLPsize.append(p)
i += 1
Model = MLPRegressor(hidden_layer_sizes=tuple(MLPsize),
activation='logistic', solver='adam', alpha=0.0001,
batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001,
power_t=0.5, max_iter=5000, shuffle=False, random_state=None, tol=0.0001,
verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
early_stopping=False, validation_fraction=0.5,
beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)
### Define function to run train-test simulation
msk = np.random.rand(len(data)) < 0.7
Train = data[msk]
Test = data[~msk]
Train = Train.reset_index(drop=True)
Test = Test.reset_index(drop=True)
def RunModel (h):
global Train_pred
global Test_pred
global Train_true
global Test_true
global Train_error
global Test_error
Train_error = []
Test_error = []
Model.fit(Train.iloc[:,:7], Train[h])
Train_pred = Model.predict(Train.iloc[:,:7])
Train_error.append(mean_squared_error(Train[h], Train_pred))
Train_true = np.array(Train[h]).reshape(-1, 1)
Train_pred = Train_pred.reshape(-1, 1)
Test_pred = Model.predict(Test.iloc[:,:7])
Test_error.append(mean_squared_error(Test[h], Test_pred))
Test_true = np.array(Test[h]).reshape(-1, 1)
Test_pred = Test_pred.reshape(-1, 1)
print(mean_squared_error(Train_pred, Train_true))
print(Model.score(Test.iloc[:,:7], Test_true))
RunModel('11939')
Prints:
0.5131277608869395
-0.02165748764016695

tensorflow internal error that fails to create session

I'm trying to implement a simple neural network using tensorflow with gpu on Jupyter Notebook, but it fails every time to create a session, I have traced the code many times and reduced the number of iterations to 10, and also reduced the number of input tain and test data,just for testing if it was a problem of computational power. My network have 2 hidden layers; the first contains 3 neurons and the second contains 2.
here is my code:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import math
import tensorflow as tf
from tensorflow.python.framework import ops
from preprocessing import load_dataset
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001):
"""
Arguments:
X_train -- training set, of shape (input size , number of training examples )
Y_train -- test set, of shape (output size , number of training examples )
X_test -- training set, of shape (input size , number of test examples )
Y_test -- test set, of shape (output size, number of test examples )
learning_rate -- learning rate of the optimization
Returns:
parameters -- parameters learnt by the model.
"""
ops.reset_default_graph()
X, Y = create_placeholders()
parameters = initialize_parameters()
Z3 = forward_propagation(X, parameters)
cost = compute_cost(Z3, Y)
print("X = " + str(X))
print("Y = " + str(Y))
print("Z3 = " + str(Z3))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(10):
print(sess.run([optimizer, cost], feed_dict={X: X_train, Y: Y_train}))
parameters = sess.run(parameters)
print("Parameters have been trained!")
correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
X_train, Y_train, X_test, Y_test=load_dataset()
parameters= model(X_train, Y_train, X_test, Y_test)
and here are the functions that I'm calling
def create_placeholders():
X = tf.placeholder(dtype=tf.float32, shape=(28755648, 5), name="X")
Y = tf.placeholder(dtype=tf.float32, shape=(1, 5), name="Y")
return X, Y
def initialize_parameters():
W1 = tf.get_variable("W1", [3, 28755648], initializer=tf.contrib.layers.xavier_initializer(seed=0))
b1 = tf.get_variable("b1", [3, 1], initializer=tf.zeros_initializer())
W2 = tf.get_variable("W2", [2, 3], initializer=tf.contrib.layers.xavier_initializer(seed=0))
b2 = tf.get_variable("b2", [2, 1], initializer=tf.zeros_initializer())
W3 = tf.get_variable("W3", [1, 2], initializer=tf.contrib.layers.xavier_initializer(seed=0))
b3 = tf.get_variable("b3", [1, 1], initializer=tf.zeros_initializer())
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
return parameters
def forward_propagation(X, parameters):
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
Z1 = tf.add(tf.matmul(W1, X), b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2, A1), b2)
A2 = tf.nn.relu(Z2)
Z3 = tf.add(tf.matmul(W3, A2), b3)
return Z3
def compute_cost(Z3, Y):
logits = Z3
labels = Y
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
return cost
And every time I run the code I got the following error:
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-42-9ce12ddb96d9> in <module>()
----> 1 parameters= model(X_train, Y_train, X_test, Y_test)
<ipython-input-38-ab5d84d97720> in model(X_train, Y_train, X_test, Y_test, learning_rate)
42
43 # Start the session to compute the tensorflow graph
---> 44 with tf.Session() as sess:
45
46 # Run the initialization
C:\Users\Chaymae\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in __init__(self, target, graph, config)
1480
1481 """
-> 1482 super(Session, self).__init__(target, graph, config=config)
1483 # NOTE(mrry): Create these on first `__enter__` to avoid a reference cycle.
1484 self._default_graph_context_manager = None
C:\Users\Chaymae\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in __init__(self, target, graph, config)
620 # pylint: enable=protected-access
621 else:
--> 622 self._session = tf_session.TF_NewDeprecatedSession(opts, status)
623 finally:
624 tf_session.TF_DeleteSessionOptions(opts)
C:\Users\Chaymae\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
471 None, None,
472 compat.as_text(c_api.TF_Message(self.status.status)),
--> 473 c_api.TF_GetCode(self.status.status))
474 # Delete the underlying status object from memory otherwise it stays alive
475 # as there is a reference to status from this from the traceback due to
InternalError: Failed to create session.
On the other hand, I have tried to create new session and run a simple tensorflow graph, and it worked well.
If anyone could help in my case I would be pleased
Put tf.reset_default_graph() before creating the tf.Session() should solve the problem.

Neural Network regression in tensorflow: error in code

I don't understand why my code wouldn't run. I started with the TensorFlow tutorial to classify the images in the mnist data set using a single layer feedforward neural net. Then modified the code to create a multilayer perceptron that maps out 37 inputs to 1 output. The input and output training data are being loaded from Matlab data file (.mat)
Here is my code..
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy.io import loadmat
%matplotlib inline
import tensorflow as tf
from tensorflow.contrib import learn
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings('ignore')
sns.set_style('white')
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split
from sklearn.datasets import make_moons
X = np.array(loadmat("Data/DataIn.mat")['TrainingDataIn'])
Y = np.array(loadmat("Data/DataOut.mat")['TrainingDataOut'])
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 10
display_step = 1
dropout_rate = 0.9
# Network Parameters
n_hidden_1 = 19 # 1st layer number of features
n_hidden_2 = 26 # 2nd layer number of features
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
X = tf.placeholder("float32", [None, 37])
Y = tf.placeholder("float32", [None])
def multilayer_perceptron(X, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(X, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(X, weights, biases)
tf.shape(pred)
tf.shape(Y)
print("Prediction matrix:", pred)
print("Output matrix:", Y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
when I run the code I get error messages:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-6b8af9192775> in <module>()
93 # Run optimization op (backprop) and cost op (to get loss value)
94 _, c, p = sess.run([optimizer, cost, pred], feed_dict={X: batch_x,
---> 95 Y: batch_y})
96 # Compute average loss
97 avg_cost += c / total_batch
~\AppData\Local\Continuum\Anaconda3\envs\ann\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
787 try:
788 result = self._run(None, fetches, feed_dict, options_ptr,
--> 789 run_metadata_ptr)
790 if run_metadata:
791 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\AppData\Local\Continuum\Anaconda3\envs\ann\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
973 'Cannot feed value of shape %r for Tensor %r, '
974 'which has shape %r'
--> 975 % (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
976 if not self.graph.is_feedable(subfeed_t):
977 raise ValueError('Tensor %s may not be fed.' % subfeed_t)
ValueError: Cannot feed value of shape (10, 1) for Tensor 'Placeholder_7:0', which has shape '(?,)'
I've encountered this problem before. The difference is that a Tensor of shape (10, 1) looks like [[1], [2], [3]], while a Tensor of shape (10,) looks like [1, 2, 3].
You should be able to fix it by changing the line
Y = tf.placeholder("float32", [None])
to:
Y = tf.placeholder("float32", [None, 1])

Resources