my initial data was like this
My data is a pandas dataframe with columns 'title' and 'label'. I want to make a custom dataset with this. so I made the dataset like below. I'm working on google colab
class newsDataset(torch.utils.data.Dataset):
def __init__(self,train=True,transform=None):
if train:
self.file = ttrain
else:
self.file= ttest
self.text_list = self.file['title'].values.tolist()
self.class_list=self.file['label'].values.tolist()
def __len__(self):
return len(self.text_list)
def __getitem__(self,idx):
label = self.class_list[idx]
text = self.text_list[idx]
if self.transform is not None:
text=self.transform(text)
return label, text
and this is how I call the dataloader
trainset=newsDataset()
train_iter = DataLoader(trainset)
iter(train_iter).next()
and it gives
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-153-9872744bc8a9> in <module>()
----> 1 iter(train_iter).next()
5 frames
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataset.py in __getattr__(self, attribute_name)
81 return function
82 else:
---> 83 raise AttributeError
84
85 #classmethod
AttributeError:
There was no exact error message. can anybody help me?
Please add the following missing line to your __init__ function:
self.transform = transform
You don't have self.transform attribute so you need to initialize it in __init__ method
Related
I am stuck in the following error in pymc3 and at a loss.
TypeError: float() argument must be a string or a number, not 'FreeRV'
The below is my code. I pick up this code from here. (Sorry, this is Japanese). However it does not work in my environment:
Google Colab, Python: 3.7.13, pymc3: 3.11.4
import numpy as np
import matplotlib
import matplotlib.pylab as plt
%matplotlib inline
from tqdm import tqdm
import pymc3 as pm
# generate time-series data
np.random.seed(0)
y = np.cumsum(np.random.normal(size=100))
# Infer parameters in time-series data
N = len(y)
T = 1000
with pm.Model() as model:
muZero = pm.Normal(name='muZero', mu=0.0, tau=1.0)
sigmaW = pm.InverseGamma(name='sigmaW', alpha=1.0, beta=1.0)
mu = [0]*N
mu[0] = pm.Normal(name='mu0', mu=muZero, tau=1/sigmaW)
for n in range(1, N):
mu[n] = pm.Normal(name='mu'+str(n), mu=mu[n-1], tau=1/sigmaW)
sigmaV = pm.InverseGamma(name='sigmaV', alpha=1.0, beta=1.0)
y_pre = pm.Normal('y_pre', mu=mu, tau=1/sigmaV, observed=y) # I got error here
start = pm.find_MAP()
step = pm.NUTS()
trace = pm.sample(T, step, start=start)
Here is the full discription of my error.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pymc3/theanof.py in floatX(X)
82 try:
---> 83 return X.astype(theano.config.floatX)
84 except AttributeError:
AttributeError: 'list' object has no attribute 'astype'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
TypeError: float() argument must be a string or a number, not 'FreeRV'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
4 frames
<ipython-input-30-c3709f545993> in <module>()
26
27 sigmaV = pm.InverseGamma(name='sigmaV', alpha=1.0, beta=1.0)
---> 28 y_pre = pm.Normal('y_pre', mu=mu, tau=1/sigmaV, observed=y) # I got error here
29 start = pm.find_MAP()
30 step = pm.NUTS()
/usr/local/lib/python3.7/dist-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
119 dist = cls.dist(*args, **kwargs, shape=shape)
120 else:
--> 121 dist = cls.dist(*args, **kwargs)
122 return model.Var(name, dist, data, total_size, dims=dims)
123
/usr/local/lib/python3.7/dist-packages/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
128 def dist(cls, *args, **kwargs):
129 dist = object.__new__(cls)
--> 130 dist.__init__(*args, **kwargs)
131 return dist
132
/usr/local/lib/python3.7/dist-packages/pymc3/distributions/continuous.py in __init__(self, mu, sigma, tau, sd, **kwargs)
485 self.tau = tt.as_tensor_variable(tau)
486
--> 487 self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu))
488 self.variance = 1.0 / self.tau
489
/usr/local/lib/python3.7/dist-packages/pymc3/theanof.py in floatX(X)
84 except AttributeError:
85 # Scalar passed
---> 86 return np.asarray(X, dtype=theano.config.floatX)
87
88
ValueError: setting an array element with a sequence.
I got error when I want to make dataframe after cleaning data! The code is as follows:
data_clean = pd.DataFrame(cleaner_data,columns=['tweet'])
data_clean.head()
and error info :
ValueError Traceback (most recent call last)
<ipython-input-62-1d07a4d30120> in <module>
----> 1 data_clean = pd.DataFrame(cleaner_data,columns=['tweet'])
2 data_clean.head()
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
507 )
508 else:
--> 509 raise ValueError("DataFrame constructor not properly called!")
510
511 NDFrame.__init__(self, mgr, fastpath=True)
ValueError: DataFrame constructor not properly called!
I don't know how to solve it. It's said dataframe constructor no properly called.
Do like this:
df_clean = cleaner_data['tweet']
df_clean.head()
DATA SET HERE https://drive.google.com/open?id=1r24rrKWcIpA1x34tPY8olJFMtjzl0IRn
I am trying to convert my time series into type DateTime, so to do that I needed to make all the number eg.(1256,430,7) into same size eg.(1256,0430,0007) for the to_datetime() to work.
So fist I separated the Entity according to their length and added number of zero required, concat the "Series" into one that were seperated.
FIRST ERROR
This error was sorted by using append() in Series. Then I tried to_datetime()
Second Error
I cant figure out what am I doing wrong
I updated my pandas library up to date.
Still the problem remains.
I tried this on Google Colab thinking might be some problem in my pandas lib.
a='0'+arr_time[arr_time.astype(str).str.len()==3].astype(int).astype(str)
b='0'+dep_time[dep_time.astype(str).str.len()==3].astype(int).astype(str)
c='00'+arr_time[arr_time.astype(str).str.len()==2].astype(int).astype(str)
d='00'+dep_time[dep_time.astype(str).str.len()==2].astype(int).astype(str)
e='000'+arr_time[arr_time.astype(str).str.len()==1].astype(int).astype(str)
f='000'+dep_time[dep_time.astype(str).str.len()==1].astype(int).astype(str)
g=arr_time[arr_time.astype(str).str.len()==4].astype(int).astype(str)
h=dep_time[dep_time.astype(str).str.len()==4].astype(int).astype(str)
arr_time=pd.concat([a,c,e,g])
dep_time=pd.concat([b,d,f,h])
'''concat() is then replaced by append() ERROR detail is below
{AttributeError Traceback (most recent call
last)
<ipython-input-20-61e7a2e98b70> in <module>()
----> 1 arr_time=pd.concat([aa,ba,ca,pa])
2 dep_time=pd.concat([ad,bd,cd,pa])
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in
__getattr__(self, name)
5065 if
self._info_axis._can_hold_identifiers_and_holds_name(name):
5066 return self[name]
-> 5067 return object.__getattribute__(self, name)
5068
5069 def __setattr__(self, name, value):
AttributeError: 'Series' object has no attribute 'concat'}'''
arr_time=a.append(c).append(e).append(g)
dep_time=b.append(d).append(f).append(h)
datetime=arr_time.to_datetime(format="%H%M")
'''second error BOTH OF THEM LOOK ALIKE
{AttributeError Traceback (most recent call last)
<ipython-input-13-5a63dad5c284> in <module>
----> 1 datetime=arr_time.to_datetime(format="%H%M")
~\AppData\Local\Continuum\anaconda3\lib\site- packages\pandas\core\generic.py in __getattr__(self, name)
5065 if
self._info_axis._can_hold_identifiers_and_holds_name(name):
5066 return self[name]
-> 5067 return object.__getattribute__(self, name)
5068
5069 def __setattr__(self, name, value):
AttributeError: 'Series' object has no attribute 'to_datetime'}'''
I am getting a strange (to my understanding) message when I try to drop a level from a multi-indexed pandas dataframe.
For a reproducible example:
toy.to_json()
'{"["ISRG","EPS_diluted"]":{"2004-12-31":0.33,"2005-01-28":0.33,"2005-03-31":0.25,"2005-04-01":0.25,"2005-04-29":0.25},"["DHR","EPS_diluted"]":{"2004-12-31":0.67,"2005-01-28":0.67,"2005-03-31":0.67,"2005-04-01":0.58,"2005-04-29":0.58},"["BDX","EPS_diluted"]":{"2004-12-31":0.75,"2005-01-28":0.75,"2005-03-31":0.72,"2005-04-01":0.72,"2005-04-29":0.72},"["SYK","EPS_diluted"]":{"2004-12-31":0.4,"2005-01-28":0.4,"2005-03-31":0.42,"2005-04-01":0.42,"2005-04-29":0.42},"["BSX","EPS_diluted"]":{"2004-12-31":0.35,"2005-01-28":0.35,"2005-03-31":0.42,"2005-04-01":0.42,"2005-04-29":0.42},"["BAX","EPS_diluted"]":{"2004-12-31":0.18,"2005-01-28":0.18,"2005-03-31":0.36,"2005-04-01":0.36,"2005-04-29":0.36},"["EW","EPS_diluted"]":{"2004-12-31":0.4,"2005-01-28":0.4,"2005-03-31":0.5,"2005-04-01":0.5,"2005-04-29":0.5},"["MDT","EPS_diluted"]":{"2004-12-31":0.44,"2005-01-28":0.45,"2005-03-31":0.45,"2005-04-01":0.45,"2005-04-29":0.16},"["ABT","EPS_diluted"]":{"2004-12-31":0.63,"2005-01-28":0.63,"2005-03-31":0.53,"2005-04-01":0.53,"2005-04-29":0.53}}'
toy.droplevel(level = 1, axis = 1)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-33-982eee5ba162> in <module>()
----> 1 toy.droplevel(level = 1, axis = 1)
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Anaconda3_64\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
4370 if self._info_axis._can_hold_identifiers_and_holds_name(name):
4371 return self[name]
-> 4372 return object.__getattribute__(self, name)
4373
4374 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'droplevel'
Problem is the use of an older pandas version, because if you check DataFrame.droplevel:
New in version 0.24.0.
The solution is to use MultiIndex.droplevel:
toy.columns = toy.columns.droplevel(level = 1)
I am working with a text in utf-8.
I want to tokenize it and then convert it into a list.
However I get the following error.
import nltk, jieba, re, os
with open('file.txt') as f:
tokenized_text = jieba.cut(f,cut_all=True)
type(tokenized_text)
generator
word_list = list(tokenized_text)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-5-16b25477c71d> in <module>()
----> 1 list(new)
~/anaconda3/lib/python3.6/site-packages/jieba/__init__.py in cut(self, sentence, cut_all, HMM)
280 - HMM: Whether to use the Hidden Markov Model.
281 '''
--> 282 sentence = strdecode(sentence)
283
284 if cut_all:
~/anaconda3/lib/python3.6/site-packages/jieba/_compat.py in strdecode(sentence)
35 if not isinstance(sentence, text_type):
36 try:
---> 37 sentence = sentence.decode('utf-8')
38 except UnicodeDecodeError:
39 sentence = sentence.decode('gbk', 'ignore')
AttributeError: '_io.TextIOWrapper' object has no attribute 'decode'
I understand the problem lies somewhere in the jieba package.
I also tried to change the code into
with open('file.txt') as f:
new = jieba.cut(f,cut_all=False)
but got the same result.
jieba.cut takes a string, not a file. This is explained in the readme.