AttributeError: 'DataParallel' object has no attribute 'copy' - pytorch

I am trying to resume training monkAI pytorch retinanet. I have loaded with .pt file instead of actual model. The changes are made in Monk_Object_Detection/5_pytorch_retinanet/lib/train_detector.py, check for '# change' in the places where its modified.
def Model(self, model_name="resnet18",gpu_devices=[0]):
'''
User function: Set Model parameters
Available Models
resnet18
resnet34
resnet50
resnet101
resnet152
Args:
model_name (str): Select model from available models
gpu_devices (list): List of GPU Device IDs to be used in training
Returns:
None
'''
num_classes = self.system_dict["local"]["dataset_train"].num_classes();
if model_name == "resnet18":
retinanet = model.resnet18(num_classes=num_classes, pretrained=True)
elif model_name == "resnet34":
retinanet = model.resnet34(num_classes=num_classes, pretrained=True)
elif model_name == "resnet50":
# retinanet = model.resnet50(num_classes=num_classes, pretrained=True)
# change
retinanet = torch.load('/content/drive/MyDrive/Object_detection_retinanet/trained_retinanet_40.pt')
elif model_name == "resnet101":
retinanet = model.resnet101(num_classes=num_classes, pretrained=True)
elif model_name == "resnet152":
retinanet = model.resnet152(num_classes=num_classes, pretrained=True)
if self.system_dict["params"]["use_gpu"]:
self.system_dict["params"]["gpu_devices"] = gpu_devices
if len(self.system_dict["params"]["gpu_devices"])==1:
os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0])
else:
os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]])
self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu'
# change - added 3 lines below
if isinstance(retinanet,torch.nn.DataParallel):
retinanet = retinanet.module
retinanet.load_state_dict(torch.load('/content/drive/MyDrive/Object_detection_retinanet/trained_retinanet_40.pt'))
retinanet = retinanet.to(self.system_dict["local"]["device"])
retinanet = torch.nn.DataParallel(retinanet).to(self.system_dict["local"]["device"])
retinanet.training = True
retinanet.train()
retinanet.module.freeze_bn()
self.system_dict["local"]["model"] = retinanet;
I am getting attribute error, when I call the Model() from main function as shown below:
from train_detector import Detector
gtf = Detector()
#Loading the dataset
root_dir = './'
coco_dir = 'coco_dir'
img_dir = 'images'
set_dir ='train'
gtf.Train_Dataset(root_dir, coco_dir, img_dir, set_dir, batch_size=8, use_gpu=True)
gtf.Model(model_name="resnet50", gpu_devices=[0, 1, 2, 3])
error:
AttributeError Traceback (most recent call last)
<ipython-input-22-1a0c8d446904> in <module>()
3 if PRE_TRAINED:
4 #Initialising Model
----> 5 gtf.Model(model_name="resnet50", gpu_devices=[0, 1, 2, 3])
6 #Setting up hyperparameters
7 gtf.Set_Hyperparams(lr=0.001, val_interval=1, print_interval=20)
2 frames
/content/Monk_Object_Detection/5_pytorch_retinanet/lib/train_detector.py in Model(self, model_name, gpu_devices)
245 if isinstance(retinanet,torch.nn.DataParallel):
246 retinanet = retinanet.module
--> 247 retinanet.load_state_dict(torch.load('/content/drive/MyDrive/Object_detection_retinanet/trained_retinanet_40.pt'))
248
249 retinanet = retinanet.to(self.system_dict["local"]["device"])
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in load_state_dict(self, state_dict, strict)
1453 # copy state_dict so _load_from_state_dict can modify it
1454 metadata = getattr(state_dict, '_metadata', None)
-> 1455 state_dict = state_dict.copy()
1456 if metadata is not None:
1457 # mypy isn't aware that "_metadata" exists in state_dict
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __getattr__(self, name)
1176 return modules[name]
1177 raise AttributeError("'{}' object has no attribute '{}'".format(
-> 1178 type(self).__name__, name))
1179
1180 def __setattr__(self, name: str, value: Union[Tensor, 'Module']) -> None:
AttributeError: 'DataParallel' object has no attribute 'copy'
kindly, help me with the solution!

I found this by simply googling your problem:
retinanet.load_state_dict(torch.load('filename').module.state_dict())
The link to the discussion is here.

Related

AttributeError: lower not found in sklearn

I'm trying to build a news classifier using sklearn, I manage to generate the models, but when I try to train it I get these messages:
AttributeError Traceback (most recent call last) Cell In [131], line 3
1 if isinstance(new_text, str):
2 new_text_tfidf = tfidf_vectorizer.transform([new_text])
----> 3 predicted_category = dt.predict(new_text_tfidf)[0]
4 else:
5 predicted_category = "Invalid input, please provide a string"
File ~\AppData\Roaming\Python\Python39\site-packages\sklearn\pipeline.py:457, in Pipeline.predict(self, X, **predict_params)
455 Xt = X
456 for _, name, transform in self._iter(with_final=False):
--> 457 Xt = transform.transform(Xt)
458 return self.steps[-1][1].predict(Xt, **predict_params)
File ~\AppData\Roaming\Python\Python39\site-packages\sklearn\feature_extraction\text.py:2103, in TfidfVectorizer.transform(self, raw_documents) 2086 """Transform documents to document-term matrix. 2087 2088 Uses the vocabulary and document frequencies (df) learned by fit (or (...) 2099 Tf-idf-weighted document-term matrix. 2100 """ 2101 check_is_fitted(self, msg="The TF-IDF vectorizer is not fitted")
-> 2103 X = super().transform(raw_documents) 2104 return self._tfidf.transform(X, copy=False)
File ~\AppData\Roaming\Python\Python39\site-packages\sklearn\feature_extraction\text.py:1387, in CountVectorizer.transform(self, raw_documents) 1384 self._check_vocabulary() 1386 # use the same matrix-building strategy as fit_transform
-> 1387 _, X = self._count_vocab(raw_documents, fixed_vocab=True) 1388 if self.binary: 1389 X.data.fill(1)
File ~\AppData\Roaming\Python\Python39\site-packages\sklearn\feature_extraction\text.py:1209, in CountVectorizer._count_vocab(self, raw_documents, fixed_vocab) 1207 for doc in raw_documents: 1208 feature_counter = {}
-> 1209 for feature in analyze(doc): 1210 try: 1211 feature_idx = vocabulary[feature]
File ~\AppData\Roaming\Python\Python39\site-packages\sklearn\feature_extraction\text.py:111, in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)
109 else:
110 if preprocessor is not None:
--> 111 doc = preprocessor(doc)
112 if tokenizer is not None:
113 doc = tokenizer(doc)
File ~\AppData\Roaming\Python\Python39\site-packages\sklearn\feature_extraction\text.py:69, in _preprocess(doc, accent_function, lower)
50 """Chain together an optional series of text preprocessing steps to
51 apply to a document.
52 (...)
66 preprocessed string
67 """
68 if lower:
---> 69 doc = doc.lower()
70 if accent_function is not None:
71 doc = accent_function(doc)
File ~\AppData\Roaming\Python\Python39\site-packages\scipy\sparse\_base.py:771, in spmatrix.__getattr__(self, attr)
769 return self.getnnz()
770 else:
--> 771 raise AttributeError(attr + " not found")
AttributeError: lower not found
Below are some piece of codes from my notebook:
preprocess_text(s) method:
def preprocess_text(s):
"""A text processing pipeline for cleaning up text using the hero package."""
s= s.replace("<br/>", "")
s = s.replace("’", "")
s = s.replace("‘", "")
s = hero.fillna(s)
s = hero.lowercase(s)
s = hero.remove_digits(s)
s = hero.remove_punctuation(s)
s = hero.remove_diacritics(s)
s = hero.remove_whitespace(s)
s = s.replace("Ë","E").replace("ë","e").replace("Ç","C").replace("ç","c")
return s
text = dataset['Text']
category = dataset['Category']
print(category)
X_train, X_test, Y_train, Y_test = train_test_split(text,category, test_size = 0.3, random_state = 42,shuffle=True, stratify=category)
# Initialize a TfidfVectorizer object: tfidf_vectorizer
tfidf_vectorizer = TfidfVectorizer(sublinear_tf=False, min_df=2, norm='l2', encoding='latin-1', ngram_range=(1,2))
# Transform the training data: tfidf_train
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
# Transform the test data: tfidf_test
tfidf_test = tfidf_vectorizer.transform(X_test)`
Train model with Random Forest algorithm:
#Random Forest Classifier
`rfc = Pipeline([('tfidf', TfidfVectorizer()),
('rfc', RandomForestClassifier(n_estimators=100)),
])
rfc.fit(X_train, Y_train)
test_predict = rfc.predict(X_test)
train_accuracy = round(rfc.score(X_train,Y_train)*100)
test_accuracy =round(accuracy_score(test_predict, Y_test)*100)
print("RandomForestClassifier Train Accuracy Score : {}% ".format(train_accuracy ))
print("RandomForestClassifier Test Accuracy Score : {}% ".format(test_accuracy ))
print()
print(classification_report(test_predict, Y_test, target_names=target_category))
import pickle
with open('model/random_fin.pkl', 'wb') as file:
pickle.dump(rfc, file)
with open('model/tfidf_vectorizer.pkl', 'wb') as file:
pickle.dump(rfc.named_steps['tfidf'], file)
new_text = "Berisha ka akuzuar Ramen ne lidhje me aferen e inceneratoreve"
new_text_tfidf = tfidf_vectorizer.transform([new_text])
predicted_category = dt.predict(new_text_tfidf)[0]
redicted_category = "Invalid input, please provide a string"
print(predicted_category)
I'm trying to resolve this issue, but until now no success...

HuggingFace | PipelineException: No mask_token (<mask>) found on the input

Goal: to for-loop over multiple models, print() elapsed time.
Processing one Model works fine:
i=0
start = time.time()
unmasker = pipeline('fill-mask', model=models[i])
unmasker("Hello I'm a [MASK] model.", top_k=1)
end = time.time()
df = df.append({'Model': models[i], 'Time': end-start}, ignore_index=True)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
However, iterating over many model names causes the titled error.
Code:
from transformers import pipeline
import time
models = ['bert-base-uncased', 'roberta-base', 'distilbert-base-uncased', 'bert-base-cased', 'albert-base-v2', 'roberta-large', 'bert-large-uncased albert-large-v2', 'albert-base-v2', 'bert-large-cased', 'albert-base-v1', 'bert-large-cased-whole-word-masking', 'bert-large-uncased-whole-word-masking', 'albert-xxlarge-v2', 'google/bigbird-roberta-large', 'albert-xlarge-v2', 'albert-xxlarge-v1', 'facebook/muppet-roberta-large', 'facebook/muppet-roberta-base', 'albert-large-v1', 'albert-xlarge-v1']
for _model in models:
start = time.time()
unmasker = pipeline('fill-mask', model=_model)
unmasker("Hello I'm a [MASK] model.", top_k=1) # default: top_k=5
end = time.time()
print(end-start)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
---------------------------------------------------------------------------
PipelineException Traceback (most recent call last)
<ipython-input-19-13b5f651657e> in <module>
3 start = time.time()
4 unmasker = pipeline('fill-mask', model=_model)
----> 5 unmasker("Hello I'm a [MASK] model.", top_k=1) # default: top_k=5
6 end = time.time()
7
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/transformers/pipelines/fill_mask.py in __call__(self, inputs, *args, **kwargs)
224 - **token** (`str`) -- The predicted token (to replace the masked one).
225 """
--> 226 outputs = super().__call__(inputs, **kwargs)
227 if isinstance(inputs, list) and len(inputs) == 1:
228 return outputs[0]
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/transformers/pipelines/base.py in __call__(self, inputs, num_workers, batch_size, *args, **kwargs)
1099 return self.iterate(inputs, preprocess_params, forward_params, postprocess_params)
1100 else:
-> 1101 return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
1102
1103 def run_multi(self, inputs, preprocess_params, forward_params, postprocess_params):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/transformers/pipelines/base.py in run_single(self, inputs, preprocess_params, forward_params, postprocess_params)
1105
1106 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
-> 1107 model_inputs = self.preprocess(inputs, **preprocess_params)
1108 model_outputs = self.forward(model_inputs, **forward_params)
1109 outputs = self.postprocess(model_outputs, **postprocess_params)
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/transformers/pipelines/fill_mask.py in preprocess(self, inputs, return_tensors, **preprocess_parameters)
82 return_tensors = self.framework
83 model_inputs = self.tokenizer(inputs, return_tensors=return_tensors)
---> 84 self.ensure_exactly_one_mask_token(model_inputs)
85 return model_inputs
86
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/transformers/pipelines/fill_mask.py in ensure_exactly_one_mask_token(self, model_inputs)
76 else:
77 for input_ids in model_inputs["input_ids"]:
---> 78 self._ensure_exactly_one_mask_token(input_ids)
79
80 def preprocess(self, inputs, return_tensors=None, **preprocess_parameters) -> Dict[str, GenericTensor]:
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/transformers/pipelines/fill_mask.py in _ensure_exactly_one_mask_token(self, input_ids)
67 "fill-mask",
68 self.model.base_model_prefix,
---> 69 f"No mask_token ({self.tokenizer.mask_token}) found on the input",
70 )
71
PipelineException: No mask_token (<mask>) found on the input
Please let me know if there's anything else I can add to post to clarify.
Only certain models would throw that error.
Since I am experimenting with runtimes for any model, the below suffices. I was successful at running the majority of models.
I applied try except logic. Note, it is considered bad practice to handle exceptions without naming the error specifically in the except statement.
for _model in models:
for i in range(10):
start = time.time()
try:
unmasker = pipeline('fill-mask', model=_model)
unmasker("Hello I'm a [MASK] model.", top_k=1) # default: top_k=5
print(_model)
except: continue
end = time.time()
df = df.append({'Model': _model, 'Time': end-start}, ignore_index=True)
print(df)
df.to_csv('model_performance.csv', index=False)

PyTorch: "TypeError: Caught TypeError in DataLoader worker process 0."

I am trying to implement RoBERTa model for sentiment analysis. First, I declared GPReviewDataset to create a PyTorch Dataset.
MAX_LEN = 160
class GPReviewDataset(Dataset):
def __init__(self, reviews, targets, tokenizer, max_len):
self.reviews = reviews
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.reviews)
def __getitem__(self, item):
review = str(self.reviews[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
review,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'review_text': review,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
Next, I implement create_data_loader to create a couple of data loaders. Here’s a helper function to do it:
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = GPReviewDataset(
reviews=df.text.to_numpy(),
targets=df.sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)
BATCH_SIZE = 16
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
dt = next(iter(train_data_loader))
However, when I run this code then it stops and gives me out these errors:
TypeError Traceback (most recent call last)
<ipython-input-35-a673c0794f60> in <module>()
----> 1 dt = next(iter(train_data_loader))
3 frames
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
1083 else:
1084 del self._task_info[idx]
-> 1085 return self._process_data(data)
1086
1087 def _try_put_index(self):
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _process_data(self, data)
1109 self._try_put_index()
1110 if isinstance(data, ExceptionWrapper):
-> 1111 data.reraise()
1112 return data
1113
/usr/local/lib/python3.6/dist-packages/torch/_utils.py in reraise(self)
426 # have message field
427 raise self.exc_type(message=msg)
--> 428 raise self.exc_type(msg)
429
430
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "<ipython-input-18-1e537ce5a428>", line 25, in __getitem__
'targets': torch.tensor(target, dtype=torch.long)
TypeError: new(): invalid data type 'str'
I do not understand why it happens, can anyone please help me and explain.
You need to define your classes as integer. I assume you are working on a classification problem. But it looks like you have defined your classes as strings. You need to convert your classes from string to integer. For example, if df.sentiment corresponds to positive, you must represent with 0 or df.sentiment corresponds to negative you need to represent with 1 in a new column.
def to_int_sentiment(label):
if label == "positive":
return 0
elif label == "negative":
return 1
df['int_sentiment'] = df.sentiment.apply(to_int_sentiment)
Then you should use column df.int_sentiment instead of df.sentiment. So you have to change create_data_loader function as follows.
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = GPReviewDataset(
reviews=df.text.to_numpy(),
targets=df.int_sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)

Removing last 2 layers from a BERT classifier results in " 'tuple' object has no attribute 'dim' " error. Why?

I fine tuned a huggingface transformer using Keras (with ktrain) and then reloaded the model in Pytorch.
I want to access the third to last layer (pre_classifier), so I removed the two last layers:
BERT2 = torch.nn.Sequential(*(list(BERT.children())[:-2]))
Running an encoded sentence through this yields the following error message:
AttributeError Traceback (most recent call last)
<ipython-input-38-640702475573> in <module>
----> 1 ans2=BERT2(torch.tensor([e1]))
2 print (ans2)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
90 def forward(self, input):
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
94
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
85
86 def forward(self, input):
---> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1366 - Output: :math:`(N, *, out\_features)`
1367 """
-> 1368 if input.dim() == 2 and bias is not None:
1369 # fused op is marginally faster
1370 ret = torch.addmm(bias, input, weight.t())
AttributeError: 'tuple' object has no attribute 'dim'
Meanwhile deleting the classifier entirely (all three layers)
BERT3 = torch.nn.Sequential(*(list(BERT.children())[:-3]))
Yields the expected tensor (within a size 1 tuple) with the expected shape ([sentence_num,token_num,768]).
Why does the removal of two (but not three) layers breaks the model?
And how can I access the pre_classifier results?
It is not accessible by setting config with output_hidden_states=True as this flag returns the hidden values of the BERT transformer stack, not those of the classifier layers downstream to it.
--
PS
The code used to initialize the BERT model:
def collect_data_for_FT():
from sklearn.datasets import fetch_20newsgroups
train_data = fetch_20newsgroups(subset='train', shuffle=True, random_state=42)
test_data = fetch_20newsgroups(subset='test', shuffle=True, random_state=42)
print('size of training set: %s' % (len(train_b['data'])))
print('size of validation set: %s' % (len(test_b['data'])))
print('classes: %s' % (train_b.target_names))
x_train = train_data.data
y_train = train_data.target
x_test = test_data.data
y_test = test_data.target
return(x_train,y_train,x_test,y_test)
bert_name = 'distilbert-base-uncased'
from transformers import DistilBertForSequenceClassification,AutoConfig,AutoTokenizer
import os
dir_path = os.getcwd()
dir_path=os.path.join(dir_path,'models')
config = AutoConfig.from_pretrained(bert_name,num_labels=20) # change model configuration to access hidden values.
try:
BERT = DistilBertForSequenceClassification.from_pretrained(dir_path,config=config)
print ("Finetuned predictor loaded")
except:
import tensorflow.keras as keras
print ("No finetuned predictor found.\nTraining.")
(x_train,y_train,x_test,y_test)=collect_data_for_FT()
####
# prework:
import ktrain
from ktrain import text
t = text.Transformer(bert_name, maxlen=500, classes=train_b.target_names)
trn = t.preprocess_train(x_train, y_train)
val = t.preprocess_test(x_test, y_test)
pre_trained_model = t.get_classifier()
learner = ktrain.get_learner(pre_trained_model, train_data=trn, val_data=val, batch_size=6)
####
####
# Find best learning rate
learner.lr_find()
learner.lr_plot()
####
learner.fit_onecycle(2e-4, 4) # choosen based on the learning rate/loss plot.
####
# prepare and save:
predictor = ktrain.get_predictor(learner.model, preproc=t)
predictor.save('my_distilbertbase_predictor')
predictor.model.save_pretrained(dir_path)
####
BERT = DistilBertForSequenceClassification.from_pretrained(os.path.join(dir_path), from_tf=True,config=config) # re-load tensorflow to pytorch
BERT.save_pretrained(dir_path) # save as a "full blooded" pytorch model
BERT = DistilBertForSequenceClassification.from_pretrained(dir_path,config=config) # re-load
from tensorflow.keras import backend as K
K.clear_session() # loading from tensorflow takes up space and the GPU. This releases it/

pytorch model.cuda() runtime error

I'm building a text classifier using pytorch, and got into some trouble with .cuda() method. I know that .cuda() moves all parameters into gpu so that the training procedure can be faster. However, error occurred in .cuda() method like this:
start_time = time.time()
for model_type in ('lstm',):
hyperparam_combinations = score_util.all_combination(hyperparam_dict[model_type].values())
# for selecting best scoring model
for test_idx, setting in enumerate(hyperparam_combinations):
args = custom_dataset.list_to_args(setting,model_type=model_type)
print(args)
tsv = "test %d\ttrain_loss\ttrain_acc\ttrain_auc\tval_loss\tval_acc\tval_auc\n"%(test_idx) # tsv record
avg_score = [] # cv_mean score
### 4 fold cross validation
for cv_num,(train_iter,val_iter) in enumerate(cv_splits):
### model initiation
model = model_dict[model_type](args)
if args.emb_type is not None: # word embedding init
emb = emb_dict[args.emb_type]
emb = score_util.embedding_init(emb,tr_text_field,args.emb_type)
model.embed.weight.data.copy_(emb)
model.cuda()
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-20-ff6cfce73c10> in <module>()
23 model.embed.weight.data.copy_(emb)
24
---> 25 model.cuda()
26
27 optimizer= torch.optim.Adam(model.parameters(),lr=args.lr)
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in cuda(self, device_id)
145 copied to that device
146 """
--> 147 return self._apply(lambda t: t.cuda(device_id))
148
149 def cpu(self, device_id=None):
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _apply(self, fn)
116 def _apply(self, fn):
117 for module in self.children():
--> 118 module._apply(fn)
119
120 for param in self._parameters.values():
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _apply(self, fn)
122 # Variables stored in modules are graph leaves, and we don't
123 # want to create copy nodes, so we have to unpack the data.
--> 124 param.data = fn(param.data)
125 if param._grad is not None:
126 param._grad.data = fn(param._grad.data)
RuntimeError: Variable data has to be a tensor, but got torch.cuda.FloatTensor
These are error traceback and I can't see why this happens.
This code worked very well before I set epoch parameter to 1 to run some tests. I set epoch to 1000 again, but the problem lingers on.
Aren't torch.cuda.FloatTensor object also Tensors? Any help would be much appreciated.
my model looks like this :
class TR_LSTM(nn.Module):
def __init__(self,args,
use_hidden_average=False,
pretrained_emb = None):
super(TR_LSTM,self).__init__()
# arguments
self.emb_dim = args.embed_dim
self.emb_num = args.embed_num
self.num_hidden_unit = args.hidden_state_dim
self.num_lstm_layer = args.num_lstm_layer
self.use_hidden_average = use_hidden_average
self.batch_size = args.batch_size
# layers
self.embed = nn.Embedding(self.emb_num, self.emb_dim)
if pretrained_emb is not None:
self.embed.weight.data.copy_(pretrained_emb)
self.lstm_layer = nn.LSTM(self.emb_dim, self.num_hidden_unit, self.num_lstm_layer, batch_first = True)
self.fc_layer = nn.Sequential(nn.Linear(self.num_hidden_unit,self.num_hidden_unit),
nn.Linear(self.num_hidden_unit,2))
def forward(self,x):
x = self.embed(x) # batch * max_seq_len * emb_dim
h_0,c_0 = self.init_hidden(x.size(0))
x, (_, _) = self.lstm_layer(x, (h_0,c_0)) # batch * seq_len * hidden_unit_num
if not self.use_hidden_average:
x = x[:,x.size(1)-1,:]
x = x.squeeze(1)
else:
x = x.mean(1).squeeze(1)
x = self.fc_layer(x)
return x
def init_hidden(self,batch_size):
h_0, c_0 = torch.zeros(self.num_lstm_layer,batch_size , self.num_hidden_unit),\
torch.zeros(self.num_lstm_layer,batch_size , self.num_hidden_unit)
h_0, c_0 = h_0.cuda(), c_0.cuda()
h_0_param, c_0_param = torch.nn.Parameter(h_0), torch.nn.Parameter(c_0)
return h_0_param, c_0_param
model.cuda() is called inside your training/test loop, which is the problem. As the error message suggests, you repeatedly convert parameters(tensors) in your model to cuda, which is not the right way to convert model into cuda tensor.
model object should be created and cuda-ize outside the loop. Only training/test instances shall be convert to cuda tensor every time you feed your model. I also suggest you read examples code from pytorch document site.

Resources