pytorch customdataset notimplemented error - pytorch

Im making my own custom datasets in pytorch.
AND, I would like to visualize the image.
however, i think something wrong is happening in custom datasets.
Please help me.
NotImplementedError Traceback (most recent call
last) in ()
1 import matplotlib.pyplot as plt
2 dat= TrainDataset(transforms.ToTensor())
----> 3 img,label= dat[i]
4 plt.imshow(img.permute(1,2,0))
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataset.py in
getitem(self, index)
31
32 def getitem(self, index) -> T_co:
---> 33 raise NotImplementedError
34
35 def add(self, other: 'Dataset[T_co]') -> 'ConcatDataset[T_co]':
NotImplementedError:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision
from torch.utils.data import DataLoader
import os
import glob
from torch.utils.data import Dataset
import pandas as pd
from PIL import Image
class TrainDataset(Dataset):
def __init__(self, transform):
super().__init__()
self.data = pd.read_csv('/content/drive/MyDrive/cancer/train_labels.csv')
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self,idx):
img_name, label = self.data.iloc[idx]
img = Image.open(f'/content/drive/MyDrive/cancer/test/{image_name}.tif')
img = self.transform(img)
return (img, torch.tensor(label).long())
import matplotlib.pyplot as plt
dat= TrainDataset(transforms.ToTensor())
img,label= dat[1]
plt.imshow(img.permute(1,2,0))

Related

detectron2: throws NotImplementedError: while using pre-trained model

I'm trying to use the pre-trained model of detectron2. While running the following code, it shows NotImplementedError.
import torch
torch.__version__
import torchvision
#torchvision.__version__
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.7/index.html
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
import numpy as np
import os, json, cv2, random
import matplotlib.pyplot as plt
%matplotlib inline
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCOInstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCOInstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
And it shows the following error:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-27-699e754fc9df> in <module>
----> 1 predictor = DefaultPredictor(cfg)
4 frames
/usr/local/lib/python3.8/dist-packages/iopath/common/file_io.py in _isfile(self, path, **kwargs)
438 bool: true if the path is a file
439 """
--> 440 raise NotImplementedError()
441
442 def _isdir(self, path: str, **kwargs: Any) -> bool:
NotImplementedError:
I had the same issue and solved it by manually downloading .pkl file and giving path to cfg.MODEL.WEIGHTS variable
You can try this:
model_weights_url = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
print(f"Downloading model from {model_weights_url}...")
local_model_weights_path = Path("./temp/downloads/model.pkl")
os.makedirs(local_model_weights_path.parent, exist_ok=True)
urllib.request.urlretrieve(model_weights_url, local_model_weights_path)
cfg.MODEL.WEIGHTS = str(local_model_weights_path)
I had the same issue just today. I manually downloaded the R-50.pkl from https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl and set cfg.MODEL.WEIGHTS = "R-50.pkl" #path to file

RandomAdjustSharpness gives IndexError: tuple index out of range

While using RandomAdjustSharpness, my code throws the following error - IndexError: tuple index out of range. I followed the instructions given over here - https://pytorch.org/vision/stable/transforms.html and therefore am confused with this error.
Here is my code -
import math, random
from sklearn.datasets import load_sample_images
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
def random_crop(imgs):
imgs = torch.tensor(imgs)
change = torch.nn.Sequential(
transforms.RandomCrop(427),
transforms.RandomAdjustSharpness(1, p=1)
)
imgs = change(imgs).numpy()
return imgs
###Obtaining a random image and preprocessing it!##
dataset = load_sample_images()
first_img_data = dataset.images[0]
first_img_data = first_img_data.reshape(-1, 427, 640)
first_img_data = first_img_data[1, :, :]
#first_img_data = first_img_data[0:84, 0:84].reshape(-1, 84,84)
# first_img_data = torch.tensor(first_img_data)
plt.figure()
plt.imshow(np.squeeze(first_img_data))
foo = random_crop(first_img_data)
plt.figure()
plt.imshow(np.squeeze(foo))
plt.show()
you need to a dimension to your tensor like this
torch.tensor([imgs])

Error 403: Forbidden when trying to load pre-trained model in Google colab

Here is my code to load a pre-trained model in a Google Colab notebookL
# Import resources
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import time
import json
import copy
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import PIL
from PIL import Image
from collections import OrderedDict
import torch
from torch import nn, optim, cuda
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import os
device = torch.device("cuda" if torch.cuda.is_available()
else "cpu")
print(device)
model = models.resnet152(pretrained=True)
num_in_features = 2048
print(model)
I have run this line before with success, however now I get the error:
Downloading: "https://download.pytorch.org/models/resnet152-b121ed2d.pth" to /root/.cache/torch/checkpoints/resnet152-b121ed2d.pth
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-13-361fc480b515> in <module>()
----> 1 model = models.resnet152(pretrained=True)
2 num_in_features = 2048
3 print(model)
9 frames
/usr/lib/python3.6/urllib/request.py in http_error_default(self, req, fp, code, msg, hdrs)
648 class HTTPDefaultErrorHandler(BaseHandler):
649 def http_error_default(self, req, fp, code, msg, hdrs):
--> 650 raise HTTPError(req.full_url, code, msg, hdrs, fp)
651
652 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 403: Forbidden
I have tried loading the model in various other ways, starting a new notebook, and !kill -9 -1
I'm new to Google Colab and am not sure what the issue really is here! Thanks in advance
Looks like this was an issue for lots of people and it is resolved now!
torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
This worked for me, found here.

function is not defined. "NameErro: name 'train' is not defined" in jupyter python 3

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
​def warn(*args, **kwargs): pass
import warnings
warnings.warn = warn
from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import StratifiedShuffleSplit
train = pd.read_csv('..\input\train.csv')
test = pd.read_csv('..\input\test.csv')
def encode(train, test):
le = LabelEncoder().fit(train.species)
labels = le.transform(train.species)
classes = list(le.classes_)
test_ids = test.id
train = train.drop(['species', 'id'], axis=1)
test = test.drop(['id'], axis=1)
return train, labels, test, test_ids, classes
train, labels, test, test_ids, classes = encode(train, test)
train.head(5)
​
NameError
Traceback (most recent call last) <ipython-input-10-08166fb1df95> in <module>()
10 return train, labels, test, test_ids, classes
11
---> 12 train, labels, test, test_ids, classes = encode(train, test)
13 train.head(5)
NameError: name 'train' is not defined

Trying to run sklearn text classification on Apache Spark..GETTING Expected sequence or array-like, got PythonRDD[1] at RDD at PythonRDD.scala:43

I am trying to run sklearn SDG classifier on twitter data which is manually labelled into two classes 0 and 1.
I am pretty new to spark and would like your help on this.
I saw some code online and tried to simulate for my example but unfortunately it doesnt seem to work and I dont know why.
Your help would be greatly appreciated.
import sys
sys.path.append('/home/userName/Downloads/spark-1.2.1/python')
from pyspark import SparkContext
import numpy as np
from sklearn.cross_validation import train_test_split, Bootstrap
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import numpy as np
from sklearn.metrics import hamming_loss
from sklearn import cross_validation
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn import preprocessing
import pandas as pd;
from sklearn import metrics
from sklearn.utils import shuffle
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from time import time
from sklearn.externals import joblib
import re
from HTMLParser import HTMLParser
from sklearn.grid_search import GridSearchCV
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
%matplotlib inline
def run(sc):
u_cols = ['CLASS','USER_RATING', 'REVIEW_TEXT']
df =
pd.read_csv('/home/userName/Desktop/input_file.csv',header=1,names=u_cols)
#Cleaning the data
lenn = len(df['REVIEW_TEXT'])
tag_remove = re.compile(r'<[^>]+>')
for i in range(0,lenn):
#Removing code block
df['REVIEW_TEXT'][i] = re.sub('<code>.*?</code>', '', df['REVIEW_TEXT'][i])
#Removeing html tags
df['REVIEW_TEXT'][i] = tag_remove.sub('', df['REVIEW_TEXT'][i])
X_train = df['REVIEW_TEXT']
y_train = df['CLASS']
X_train_final = X_train
y_train_final = y_train
#Validation Set Approach
X_train_final, X_test_final, y_train_final, y_test_final = cross_validation.train_test_split(
X_train_final, y_train_final, test_size=0.05, random_state=15)
vectorizer = HashingVectorizer(decode_error='ignore', n_features=2 ** 20,
non_negative=True, stop_words = 'english', ngram_range = (1,2))
X_train_final = vectorizer.transform(X_train_final)
X_test_final = vectorizer.transform(X_test_final)
model = (SGDClassifier(alpha=1e-05, class_weight=None, epsilon=0.1, eta0=0.0,fit_intercept=True,
l1_ratio=0.15, learning_rate='optimal',loss='hinge', n_iter=5, n_jobs=1,
penalty='l1', power_t=0.5,random_state=None, shuffle=False, verbose=0,
warm_start=False))
samples = sc.parallelize(Bootstrap(y_train_final.shape[0]))
vote_tally = samples.map(lambda (index, _):
model.fit(X[index], y[index]).predict(X_test)
)
return accuracy_score(y_test_final, vote_tally)
if __name__ == '__main__':
print run(SparkContext("local", "Boost"))
getting the following ERROR:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-1-be25c966218e> in <module>()
107
108 if __name__ == '__main__':
--> 109 print run(SparkContext("local", "Boost"))
110
<ipython-input-1-be25c966218e> in run(sc)
102 )
103
--> 104 return accuracy_score(y_test_final, vote_tally)
105 #print vote_tally.count()
106 #return vote_tally
/usr/local/lib/python2.7/dist-packages/sklearn/metrics/metrics.pyc in accuracy_score(y_true, y_pred, normalize, sample_weight)
1295
1296 # Compute accuracy for each possible representation
-> 1297 y_type, y_true, y_pred = _check_clf_targets(y_true, y_pred)
1298 if y_type == 'multilabel-indicator':
1299 score = (y_pred != y_true).sum(axis=1) == 0
/usr/local/lib/python2.7/dist-packages/sklearn/metrics/metrics.pyc in _check_clf_targets(y_true, y_pred)
107 y_pred : array or indicator matrix
108 """
--> 109 y_true, y_pred = check_arrays(y_true, y_pred, allow_lists=True)
110 type_true = type_of_target(y_true)
111 type_pred = type_of_target(y_pred)
/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.pyc in check_arrays(*arrays, **options)
248 checked_arrays.append(array)
249 continue
--> 250 size = _num_samples(array)
251
252 if size != n_samples:
/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.pyc in _num_samples(x)
172 x = np.asarray(x)
173 else:
--> 174 raise TypeError("Expected sequence or array-like, got %r" % x)
175 return x.shape[0] if hasattr(x, 'shape') else len(x)
176
**TypeError: Expected sequence or array-like, got PythonRDD[1] at RDD at PythonRDD.scala:43**
The problem is that sklearn components expects sequences/array-like/sparse/etc. data to work on, but you work with RDDs in pyspark.
We have a library which can help you solve your problem. It's called sparkit-learn.
Give it a try.

Resources