I'm training polynomial regressions over a series of dimensions, and attempting to use predict() for a list of inputs.
inputs = np.linspace(0,10,100).reshape(-1,1)
for i, deg in enumerate([1, 3, 6, 9]):
poly = PolynomialFeatures(degree=deg)
X_poly = poly.fit_transform(X_train.reshape(-1,1))
linreg = LinearRegression().fit(X_poly, y_train)
print(linreg.predict(inputs))
When I call predict(), I get the following traceback:
ValueError Traceback (most recent call last)
<ipython-input-5-4100ae3f3ba3> in <module>()
13 return
14
---> 15 answer_one()
<ipython-input-5-4100ae3f3ba3> in answer_one()
9 X_poly = PolynomialFeatures(degree=deg).fit_transform(X_train.reshape(-1,1))
10 linreg = LinearRegression().fit(X_poly, y_train)
---> 11 print(linreg.predict(inputs))
12 # print(linreg.score(X_poly, y_train))
13 return
/opt/conda/lib/python3.6/site-packages/sklearn/linear_model/base.py in predict(self, X)
266 Returns predicted values.
267 """
--> 268 return self._decision_function(X)
269
270 _preprocess_data = staticmethod(_preprocess_data)
/opt/conda/lib/python3.6/site-packages/sklearn/linear_model/base.py in _decision_function(self, X)
251 X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
252 return safe_sparse_dot(X, self.coef_.T,
--> 253 dense_output=True) + self.intercept_
254
255 def predict(self, X):
/opt/conda/lib/python3.6/site-packages/sklearn/utils/extmath.py in safe_sparse_dot(a, b, dense_output)
187 return ret
188 else:
--> 189 return fast_dot(a, b)
190
191
ValueError: shapes (100,1) and (2,) not aligned: 1 (dim 1) != 2 (dim 0)
The (100,1) shape is clearly for the inputs array, but I'm not sure what object's shape is (2,).
When you train a classifier with poly:
X_poly = poly.fit_transform(X_train.reshape(-1,1))
you need to make sure that the prediction is also using poly values:
print(linreg.predict(inputs))
in this case inputs have to be also polys:
inputs = poly.transform(inputs)
print(linreg.predict(inputs))
Related
I tried to run the code below, but I have received the error below. I have a problem to get the 'train_score' and 'test_score'. I would be happy if you can advise me how to fix this error.
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import forest
import sklearn.ensemble
mse_train1 = []
mse_test1 = []
num_trees1 = []
train_scores, test_scores = list(), list()
model = RandomForestRegressor (n_estimators = 1 ,min_samples_leaf = 7, n_jobs=-1, oob_score=True,random_state=0)
i = 0
for iter in range(10):
num_trees1.append(iter)
i=+1
model.fit(train_set_RF, train_set_pred)
y_train_predicted = model.predict(train_set_RF)
train_score = model.score(train_set_pred, y_train_predicted)
train_scores.append(train_score)
y_test_predicted = model.predict(test_set_RF)
test_score = model.score(test_set_pred, y_test_predicted)
test_scores.append(test_score)
mse_train = mean_squared_error(train_set_pred, y_train_predicted)
mse_train1.append(mse_train)
mse_test = mean_squared_error(test_set_pred, y_test_predicted)
mse_test1.append(mse_test)
print("Iteration: {} Train mse: {} Test mse: {}".format(iter, mse_train, mse_test))
model.n_estimators += 1
print (train_scores)
print (test_scores)
print (mse_train1)
print (mse_test1)
This more detail about the error I got:
ValueError Traceback (most recent call
last)
<ipython-input-17-ff545aa1896c> in <module>
19 y_train_predicted = model.predict(train_set_RF)
20 #y_train_predicted =
np.nan_to_num(y_train_predicted.astype(np.float32))
---> 21 train_score = model.score(train_set_pred, y_train_predicted)
22
23 #train_acc = accuracy_score(train_set_pred,
y_train_predicted)
~\anaconda3\lib\site-packages\sklearn\base.py in score(self, X, y,
sample_weight)
549
550 from .metrics import r2_score
--> 551 y_pred = self.predict(X)
552 return r2_score(y, y_pred, sample_weight=sample_weight)
553
~\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py in
predict(self, X)
781 check_is_fitted(self)
782 # Check data
--> 783 X = self._validate_X_predict(X)
784
785 # Assign chunk of trees to jobs
~\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py in
_validate_X_predict(self, X)
419 check_is_fitted(self)
420
--> 421 return self.estimators_[0]._validate_X_predict(X,
check_input=True)
422
423 #property
~\anaconda3\lib\site-packages\sklearn\tree\_classes.py in
_validate_X_predict(self, X, check_input)
386 """Validate X whenever one tries to predict, apply,
predict_proba"""
387 if check_input:
--> 388 X = check_array(X, dtype=DTYPE, accept_sparse="csr")
389 if issparse(X) and (X.indices.dtype != np.intc or
390 X.indptr.dtype != np.intc):
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in
inner_f(*args, **kwargs)
71 FutureWarning)
72 kwargs.update({k: arg for k, arg in zip(sig.parameters,
args)})
---> 73 return f(**kwargs)
74 return inner_f
75
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in
check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy,
force_all_finite, ensure_2d, allow_nd, ensure_min_samples,
ensure_min_features, estimator)
622 "Reshape your data either using
array.reshape(-1, 1) if "
623 "your data has a single feature or
array.reshape(1, -1) "
--> 624 "if it contains a single sample.".format(array))
625
626 # in the future np.flexible dtypes will be handled like
object dtypes
ValueError: Expected 2D array, got 1D array instead:
array=[0.3119313 0.29728386 0.29309732 ... 0.30558413 0.29317933
0.29755104].
Reshape your data either using array.reshape(-1, 1) if your data has a
single feature or array.reshape(1, -1) if it contains a single sample.
I'm trying to make a quite simple gradient descent implementation but I'm having this error as I execute the function, particularly pointing at the np.dot function that calculates the predicted value at the beginning of each loop. it gives me some "ValueError: Length of passed values is 1445, index implies 1." This is despite the dot product being right as It's (1445, 4) * (4,1). The loop makes its first iteration successfully then that error is thrown
Here's the code : `
def gredientDescent(inputs, outputs, learning_rate) :
weights = np.zeros((4,1))
bias = 0
for i in range(num_observations) :
print(weights)
predicted = np.dot(inputs, weights) + bias
deltas = predicted - outputs
cost = np.sum(deltas ** 2) / num_observations
dw = np.dot(inputs.T, deltas)
db = np.sum(deltas)
weights = weights - (learning_rate * dw)
bias = bias - (learning_rate * db)
print(weights)
gredientDescent(inputs,outputs, 0.001)
`
and the error that comes up :
ValueError Traceback (most recent call last)
<ipython-input-177-5517d5583095> in <module>
38
39
---> 40 gredientDescent(inputs,outputs, 0.001)
<ipython-input-177-5517d5583095> in gredientDescent(inputs, outputs, learning_rate)
11
12
---> 13 predicted = np.dot(inputs, weights) + bias
14
15
C:\anaconda3\envs\py3tf2\lib\site-packages\pandas\core\series.py in __array_ufunc__(self, ufunc, method, *inputs, **kwargs)
634 # for binary ops, use our custom dunder methods
635 result = ops.maybe_dispatch_ufunc_to_dunder_op(
--> 636 self, ufunc, method, *inputs, **kwargs
637 )
638 if result is not NotImplemented:
pandas\_libs\ops_dispatch.pyx in pandas._libs.ops_dispatch.maybe_dispatch_ufunc_to_dunder_op()
C:\anaconda3\envs\py3tf2\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
62 other = item_from_zerodim(other)
63
---> 64 return method(self, other)
65
66 return new_method
C:\anaconda3\envs\py3tf2\lib\site-packages\pandas\core\ops\__init__.py in wrapper(left, right)
503 result = arithmetic_op(lvalues, rvalues, op, str_rep)
504
--> 505 return _construct_result(left, result, index=left.index, name=res_name)
506
507 wrapper.__name__ = op_name
C:\anaconda3\envs\py3tf2\lib\site-packages\pandas\core\ops\__init__.py in _construct_result(left, result, index, name)
476 # We do not pass dtype to ensure that the Series constructor
477 # does inference in the case where `result` has object-dtype.
--> 478 out = left._constructor(result, index=index)
479 out = out.__finalize__(left)
480
C:\anaconda3\envs\py3tf2\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
290 if len(index) != len(data):
291 raise ValueError(
--> 292 f"Length of passed values is {len(data)}, "
293 f"index implies {len(index)}."
294 )
ValueError: Length of passed values is 1445, index implies 1.
This should work:
weights = np.zeros((4,1))
bias = 0
for i in range(num_observations) :
print(weights)
predicted = np.dot(inputs, weights.T) + bias
deltas = predicted - outputs
cost = np.sum(deltas ** 2) / num_observations
dw = np.dot(inputs.T, deltas)
db = np.sum(deltas)
weights = weights - (learning_rate * dw)
bias = bias - (learning_rate * db)
print(weights)
gredientDescent(inputs,outputs, 0.001)
Note:
predicted = np.dot(inputs, weights) + bias changed to predicted = np.dot(inputs, weights.T) + bias
np.dot(inputs.T, deltas) changed to to np.dot(inputs, deltas.T)
, as you want to get sum of multiplications of each input by its' weight, and not sum of weight multiplications by each input.
Hope this helps. Cheers.
Getting an error Unknown label type: (array([0.14999, 0.175 , 0.225 , ..., 4.991 , 5. , 5.00001]),) when trying to fit the dataset.
from sklearn.datasets import fetch_california_housing
from sklearn.datasets import load_iris
cali = fetch_california_housing()
iris = load_iris()
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB() # probabilistic
y_pred_cali = gnb.fit(cali.data, cali.target).predict(cali.data)
Error:
ValueError Traceback (most recent call last)
<ipython-input-23-71ed3304ef0f> in <module>
14
15 gnb = GaussianNB() # probabilistic
---> 16 y_pred_cali = gnb.fit(cali[0], cali[1]).predict(cali[0])
17
~\Anaconda3\lib\site-packages\sklearn\naive_bayes.py in fit(self, X, y, sample_weight)
189 X, y = check_X_y(X, y)
190 return self._partial_fit(X, y, np.unique(y), _refit=True,
--> 191 sample_weight=sample_weight)
192
193 #staticmethod
~\Anaconda3\lib\site-packages\sklearn\naive_bayes.py in _partial_fit(self, X, y, classes, _refit, sample_weight)
351 self.classes_ = None
352
--> 353 if _check_partial_fit_first_call(self, classes):
354 # This is the first call to partial_fit:
355 # initialize various cumulative counters
~\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in _check_partial_fit_first_call(clf, classes)
318 else:
319 # This is the first call to partial_fit
--> 320 clf.classes_ = unique_labels(classes)
321 return True
322
~\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in unique_labels(*ys)
92 _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
93 if not _unique_labels:
---> 94 raise ValueError("Unknown label type: %s" % repr(ys))
95
96 ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))
ValueError: Unknown label type: (array([0.14999, 0.175 , 0.225 , ..., 4.991 , 5. , 5.00001]),)
This data set has a continuous target variable.
GNB is a classification method, not a regression method. Y needs to be discrete classes, not a continuous variable.
I am using RandomForestClassifier from sklearn.ensemble. It works when I use it without class_weight but when I add class_weight It gives this error.
lr = RandomForestClassifier(n_estimators = 22,criterion =
'entropy',max_depth=5,class_weight={'Sex':2.})
lr.fit(X_train.values[:,1:],Y_train)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-248-411a1c135d08> in <module>
1 print(X_train)
----> 2 lr.fit(X_train.values[:,1:],Y_train)
/opt/conda/lib/python3.6/site-packages/sklearn/ensemble/forest.py in fit(self, X, y, sample_weight)
273 self.n_outputs_ = y.shape[1]
274
--> 275 y, expanded_class_weight = self._validate_y_class_weight(y)
276
277 if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
/opt/conda/lib/python3.6/site-packages/sklearn/ensemble/forest.py in _validate_y_class_weight(self, y)
519 class_weight = self.class_weight
520 expanded_class_weight = compute_sample_weight(class_weight,
--> 521 y_original)
522
523 return y, expanded_class_weight
/opt/conda/lib/python3.6/site-packages/sklearn/utils/class_weight.py in compute_sample_weight(class_weight, y, indices)
161 weight_k = compute_class_weight(class_weight_k,
162 classes_full,
--> 163 y_full)
164
165 weight_k = weight_k[np.searchsorted(classes_full, y_full)]
/opt/conda/lib/python3.6/site-packages/sklearn/utils/class_weight.py in compute_class_weight(class_weight, classes, y)
63 i = np.searchsorted(classes, c)
64 if i >= len(classes) or classes[i] != c:
---> 65 raise ValueError("Class label {} not present.".format(c))
66 else:
67 weight[i] = class_weight[c]
ValueError: Class label Sex not present.
This is my X_train :
PassengerId Pclass Sex ... Ticket Fare Embarked
How many classes do you have in Y_train?
The class_weight concerns the Y_train i.e. the labels.
Example:
class_weight={0:1,1:2}
means weight 1 to class 0 and weight 2 to class 1.
Using class_weight={'Sex':2.} is wrong and it refers to X_train.
i have been trying to use fastai with a custom torch model. My code is as follow:
X_train = np.load(dirpath + 'X_train.npy')
X_valid = np.load(dirpath + 'X_valid.npy')
Y_train = np.load(dirpath + 'Y_train.npy')
Y_valid = np.load(dirpath + 'Y_valid.npy')
X_train's shape is : (240, 122, 96),
and Y_train's shape is : (240,1)
Then i convert these to torch tensors ,
# Converting data to torch tensors
def to_torch_data(x,np_type,tch_type):
return torch.from_numpy(x.astype(np_type)).to(tch_type)
X_train = to_torch_data(X_train,float,torch.float32)
X_valid = to_torch_data(X_valid,float,torch.float32)
Y_train = to_torch_data(Y_train,float,torch.float32)
Y_valid = to_torch_data(Y_valid,float,torch.float32)
Creating TensorDataSets for fastai DataBunch wrapper,
# Creating torch tensor datasets so that data can be used
# on ImageDataBunch function for fastai
train_ds = tdatautils.TensorDataset(X_train,Y_train)
valid_ds = tdatautils.TensorDataset(X_valid,Y_valid)
# Creating DataBunch object to be used as data in fastai methods.
batch_size = 24
my_data_bunch = DataBunch.create(train_ds,valid_ds,bs=batch_size)
And this is my custom torch model :
# Creating corresponding torch model
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self,droprate=0,activationF=None):
super(Net, self).__init__()
self.lstm_0 = nn.LSTM(96, 720)
self.activation_0 = nn.ELU()
self.dropout_0 = nn.Dropout(p=droprate)
self.lstm_1 = nn.LSTM(720,480)
self.activation_1 = nn.ELU()
self.batch_norm_1 = nn.BatchNorm1d(122)
self.fc_2 = nn.Linear(480,128)
self.dropout_2 = nn.Dropout(p=droprate)
self.last = nn.Linear(128,1)
self.last_act = nn.ReLU()
def forward(self, x):
out,hid1 = self.lstm_0(x)
out = self.dropout_0(self.activation_0(out))
out,hid2 = self.lstm_1(out)
out = out[:,-1,:]
out = self.batch_norm_1(self.activation_1(out))
out = self.dropout_2(self.fc_2(out))
out = self.last_act(self.last(out))
return out
#create instance of model
net = Net(droprate=train_droprate,activationF=train_activation) #.cuda()
print(net)
After all these, i run the learn for lr_find method. And i get this error :
Empty Traceback (most recent call last)
C:\Anaconda3\envs\fastai\lib\site-packages\torch\utils\data\dataloader.py in _try_get_batch(self, timeout)
510 try:
--> 511 data = self.data_queue.get(timeout=timeout)
512 return (True, data)
C:\Anaconda3\envs\fastai\lib\queue.py in get(self, block, timeout)
171 if remaining <= 0.0:
--> 172 raise Empty
173 self.not_empty.wait(remaining)
Empty:
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
<ipython-input-35-e4b7603c0a82> in <module>
----> 1 my_learner.lr_find()
~\Desktop\fastai\fastai\fastai\train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, wd)
30 cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
31 epochs = int(np.ceil(num_it/len(learn.data.train_dl)))
---> 32 learn.fit(epochs, start_lr, callbacks=[cb], wd=wd)
33
34 def to_fp16(learn:Learner, loss_scale:float=None, max_noskip:int=1000, dynamic:bool=True, clip:float=None,
~\Desktop\fastai\fastai\fastai\basic_train.py in fit(self, epochs, lr, wd, callbacks)
197 callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
198 if defaults.extra_callbacks is not None: callbacks += defaults.extra_callbacks
--> 199 fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
200
201 def create_opt(self, lr:Floats, wd:Floats=0.)->None:
~\Desktop\fastai\fastai\fastai\basic_train.py in fit(epochs, learn, callbacks, metrics)
97 cb_handler.set_dl(learn.data.train_dl)
98 cb_handler.on_epoch_begin()
---> 99 for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
100 xb, yb = cb_handler.on_batch_begin(xb, yb)
101 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
C:\Anaconda3\envs\fastai\lib\site-packages\fastprogress\fastprogress.py in __iter__(self)
70 self.update(0)
71 try:
---> 72 for i,o in enumerate(self._gen):
73 if i >= self.total: break
74 yield o
~\Desktop\fastai\fastai\fastai\basic_data.py in __iter__(self)
73 def __iter__(self):
74 "Process and returns items from `DataLoader`."
---> 75 for b in self.dl: yield self.proc_batch(b)
76
77 #classmethod
C:\Anaconda3\envs\fastai\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
574 while True:
575 assert (not self.shutdown and self.batches_outstanding > 0)
--> 576 idx, batch = self._get_batch()
577 self.batches_outstanding -= 1
578 if idx != self.rcvd_idx:
C:\Anaconda3\envs\fastai\lib\site-packages\torch\utils\data\dataloader.py in _get_batch(self)
541 elif self.pin_memory:
542 while self.pin_memory_thread.is_alive():
--> 543 success, data = self._try_get_batch()
544 if success:
545 return data
C:\Anaconda3\envs\fastai\lib\site-packages\torch\utils\data\dataloader.py in _try_get_batch(self, timeout)
517 if not all(w.is_alive() for w in self.workers):
518 pids_str = ', '.join(str(w.pid) for w in self.workers if not w.is_alive())
--> 519 raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str))
520 if isinstance(e, queue.Empty):
521 return (False, None)
RuntimeError: DataLoader worker (pid(s) 9584, 7236, 5108, 932, 13228, 13992, 4576, 13204) exited unexpectedly
I have researched about DataLoader but couldn't find anything useful.
Although I didn't understand the error message you posted, I see one problem in your code.
out = out[:,-1,:] # batch_size x 480
out = self.batch_norm_1(self.activation_1(out))
But you declared batch_norm_1 as:
self.batch_norm_1 = nn.BatchNorm1d(122)
Which should be:
self.batch_norm_1 = nn.BatchNorm1d(480)