FGSM attack in Foolbox - pytorch

I am using Foolbox 3.3.1 to perform some adversarial attacks on resnet50 network. The code is as follows:
import torch
from torchvision import models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(pretrained=True).to(device)
mean = [0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
preprocessing = dict(mean=mean, std=std, axis=-3)
bounds = (0, 1)
fmodel = fb.models.PyTorchModel(model, bounds=bounds, preprocessing=preprocessing)
images, labels = fb.utils.samples(fmodel, dataset='imagenet', batchsize=8)
labels_float = labels.to(torch.float32)
def perform_attack(attack, fmodel, images, labels, predicted_labels_before_attack):
print(f'Performing attack with {type(attack).__name__}...', end='')
raw, clipped, is_adv = attack(fmodel, images, labels, epsilons=0.03)
logits_after_attacks = fmodel(clipped)
labels_after_attack = logits_after_attacks.max(dim=1)[1].cpu().numpy()
for image, predicted_label_before_attack, label, label_after_attack in zip(images, predicted_labels_before_attack, labels.cpu().numpy(), labels_after_attack):
label_imshow = type(attack).__name__
if predicted_label_before_attack == label and label != label_after_attack:
label_imshow += '; successful attack'
label_imshow += f'\nTrue class: {lab_dict[label]}\nClassified before attack as: {lab_dict[predicted_label_before_attack]}\nClassified after attack as: {lab_dict[label_after_attack]}'
imshow(image, label_imshow)
for attack in (
fb.attacks.FGSM(), # "nll_loss_forward_no_reduce_cuda_kernel_index" not implemented for 'Int'
perform_attack(attack, fmodel, images, labels, predicted_labels_before_attack)
I get the error:
RuntimeError: "nll_loss_forward_no_reduce_cuda_kernel_index" not implemented for 'Int'
with full stack:
Performing attack with LinfFastGradientAttack...
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_1736/3238714708.py in <module>
28 # fb.attacks.BoundaryAttack(), # very slow
29 ):
---> 30 perform_attack(attack, fmodel, images, labels, predicted_labels_before_attack)
~\AppData\Local\Temp/ipykernel_1736/3978727835.py in perform_attack(attack, fmodel, images, labels, predicted_labels_before_attack)
1 def perform_attack(attack, fmodel, images, labels, predicted_labels_before_attack):
2 print(f'Performing attack with {type(attack).__name__}...', end='')
----> 3 raw, clipped, is_adv = attack(fmodel, images, labels, epsilons=0.03)
4 print('done')
5 logits_after_attacks = fmodel(clipped)
~\anaconda3\envs\adversarial\lib\site-packages\foolbox\attacks\base.py in __call__(***failed resolving arguments***)
277 success = []
278 for epsilon in real_epsilons:
--> 279 xp = self.run(model, x, criterion, epsilon=epsilon, **kwargs)
281 # clip to epsilon because we don't really know what the attack returns;
~\anaconda3\envs\adversarial\lib\site-packages\foolbox\attacks\fast_gradient_method.py in run(self, model, inputs, criterion, epsilon, **kwargs)
90 raise ValueError("unsupported criterion")
---> 92 return super().run(
93 model=model, inputs=inputs, criterion=criterion, epsilon=epsilon, **kwargs
94 )
~\anaconda3\envs\adversarial\lib\site-packages\foolbox\attacks\gradient_descent_base.py in run(***failed resolving arguments***)
91 for _ in range(self.steps):
---> 92 _, gradients = self.value_and_grad(loss_fn, x)
93 gradients = self.normalize(gradients, x=x, bounds=model.bounds)
94 x = x + gradient_step_sign * stepsize * gradients
~\anaconda3\envs\adversarial\lib\site-packages\foolbox\attacks\gradient_descent_base.py in value_and_grad(self, loss_fn, x)
50 x: ep.Tensor,
51 ) -> Tuple[ep.Tensor, ep.Tensor]:
---> 52 return ep.value_and_grad(loss_fn, x)
54 def run(
~\anaconda3\envs\adversarial\lib\site-packages\eagerpy\framework.py in value_and_grad(f, t, *args, **kwargs)
350 f: Callable[..., TensorType], t: TensorType, *args: Any, **kwargs: Any
351 ) -> Tuple[TensorType, TensorType]:
--> 352 return t.value_and_grad(f, *args, **kwargs)
~\anaconda3\envs\adversarial\lib\site-packages\eagerpy\tensor\tensor.py in value_and_grad(self, f, *args, **kwargs)
541 self: TensorType, f: Callable[..., TensorType], *args: Any, **kwargs: Any
542 ) -> Tuple[TensorType, TensorType]:
--> 543 return self._value_and_grad_fn(f, has_aux=False)(self, *args, **kwargs)
545 #final
~\anaconda3\envs\adversarial\lib\site-packages\eagerpy\tensor\pytorch.py in value_and_grad(x, *args, **kwargs)
493 loss, aux = f(x, *args, **kwargs)
494 else:
--> 495 loss = f(x, *args, **kwargs)
496 loss = loss.raw
497 loss.backward()
~\anaconda3\envs\adversarial\lib\site-packages\foolbox\attacks\gradient_descent_base.py in loss_fn(inputs)
40 def loss_fn(inputs: ep.Tensor) -> ep.Tensor:
41 logits = model(inputs)
---> 42 return ep.crossentropy(logits, labels).sum()
44 return loss_fn
~\anaconda3\envs\adversarial\lib\site-packages\eagerpy\framework.py in crossentropy(logits, labels)
320 def crossentropy(logits: TensorType, labels: TensorType) -> TensorType:
--> 321 return logits.crossentropy(labels)
~\anaconda3\envs\adversarial\lib\site-packages\eagerpy\tensor\pytorch.py in crossentropy(self, labels)
462 raise ValueError("labels must be 1D and must match the length of logits")
463 return type(self)(
--> 464 torch.nn.functional.cross_entropy(self.raw, labels.raw, reduction="none")
465 )
~\anaconda3\envs\adversarial\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
2844 if size_average is not None or reduce is not None:
2845 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2846 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: "nll_loss_forward_no_reduce_cuda_kernel_index" not implemented for 'Int'
Any clue?

i think the problem is on the 3978727835.py file try to change the argument labels to 'labels.to(DEVICE).long()' if you use CUDA unless change it to 'labels.long()'


XGBoost: While using the `eval_set` in .fit causing Error

I'm trying to train the model using Xgboost. The code is doing split using KFold. And for each fold, it's running the Xgboost model using fit. Within the fit function, I'm trying to evaluate both train and valid data to check if the errors. And then doing the prediction in test set.
I'm running the following code using Xgboost.
kf = GroupKFold(n_splits=4)
for trn_idx, test_idx in kf.split(X, groups=X.year) :
x_train, x_valid = X.iloc[trn_idx], X.iloc[test_idx]
y_train, y_valid = y.iloc[trn_idx], y.iloc[test_idx]
xgb_model = xgb.XGBRegressor(
booster = 'dart',
eta = 0.1,
gamma = 0,
colsample_bytree = 0.7,
n_estimators = 1200,
max_depth = 1,
reg_alpha = 1.1,
reg_lambda = 1.1,
subsample = 0.03,
xgb_model.fit(x_train, y_train,
eval_set=[(x_train, y_train), (x_valid,y_valid)], early_stopping_rounds=20,
But I'm getting the following error. I checked this doc, and my code is according to the doc. Can someone please help me find the solution?
AttributeError Traceback (most recent call last)
<ipython-input-38-81b11a21472c> in <module>
23 eval_metric=my_smape)
---> 25 xgb_model.fit(x_train, y_train,
26 eval_set=[(x_valid,y_valid)], early_stopping_rounds=20,
27 verbose=True)
D:\Anaconda\lib\site-packages\xgboost\core.py in inner_f(*args, **kwargs)
573 for k, arg in zip(sig.parameters, args):
574 kwargs[k] = arg
--> 575 return f(**kwargs)
577 return inner_f
D:\Anaconda\lib\site-packages\xgboost\sklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, base_margin_eval_set, feature_weights, callbacks)
959 xgb_model, eval_metric, params, early_stopping_rounds, callbacks
960 )
--> 961 self._Booster = train(
962 params,
963 train_dmatrix,
D:\Anaconda\lib\site-packages\xgboost\core.py in inner_f(*args, **kwargs)
573 for k, arg in zip(sig.parameters, args):
574 kwargs[k] = arg
--> 575 return f(**kwargs)
577 return inner_f
D:\Anaconda\lib\site-packages\xgboost\training.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks, custom_metric)
180 break
181 bst.update(dtrain, i, obj)
--> 182 if cb_container.after_iteration(bst, i, dtrain, evals):
183 break
D:\Anaconda\lib\site-packages\xgboost\callback.py in after_iteration(self, model, epoch, dtrain, evals)
237 for _, name in evals:
238 assert name.find('-') == -1, 'Dataset name should not contain `-`'
--> 239 score: str = model.eval_set(evals, epoch, self.metric, self._output_margin)
240 splited = score.split()[1:] # into datasets
241 # split up `test-error:0.1234`
D:\Anaconda\lib\site-packages\xgboost\core.py in eval_set(self, evals, iteration, feval, output_margin)
1860 if feval is not None:
1861 for dmat, evname in evals:
-> 1862 feval_ret = feval(
1863 self.predict(dmat, training=False, output_margin=output_margin), dmat
1864 )
D:\Anaconda\lib\site-packages\xgboost\sklearn.py in inner(y_score, dmatrix)
99 def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:
100 y_true = dmatrix.get_label()
--> 101 return func.__name__, func(y_true, y_score)
102 return inner
AttributeError: '_PredictScorer' object has no attribute '__name__'
It looks like you've run make_scorer() on your custom metric. Try supplying the original function as eval_metric instead, this should fix the issue.

ValueError: Unable to create tensor, you should probably activate padding with 'padding=True'

I am trying to evaluate facebook/hubert-base-ls9601 Huggingface pre-trained model after fine-tuning on a private dataset.
I am using facebook/hubert-base-ls9601 pre-trained model, and Wav2vec2 feature extractor, and pooling mode set to mean.
Here's the evaluation code:
test_dataset = load_dataset("csv", data_files={"test": "/content/drive/MyDrive/freelancing/test.csv"}, delimiter="\t")["test"]
def speech_file_to_array_fn(batch):
speech_array, sampling_rate = torchaudio.load(batch["path"])
resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
speech = resampler(speech_array).squeeze().numpy()
batch["speech"] = speech_array
return batch
def predict(batch):
features = feature_extractor(batch["speech"], sampling_rate=feature_extractor.sampling_rate, return_tensors="pt", padding=True)
input_values = features.input_values.to(device)
with torch.no_grad():
logits = model(input_values).logits
pred_ids = torch.argmax(logits, dim=-1).detach().cpu().numpy()
batch["predicted"] = pred_ids
return batch
test_dataset = test_dataset.map(speech_file_to_array_fn)
result = test_dataset.map(predict, batched=True, batch_size=2)
On the last line of code, I encounter the following error block:
ValueError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in convert_to_tensors(self, tensor_type)
168 if not is_tensor(value):
--> 169 tensor = as_tensor(value)
ValueError: could not broadcast input array from shape (2,220683) into shape (2,)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
12 frames
<ipython-input-73-7bd88adad349> in <module>()
----> 1 result = test_dataset.map(predict, batched=True, batch_size=2)
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in map(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)
1970 new_fingerprint=new_fingerprint,
1971 disable_tqdm=disable_tqdm,
-> 1972 desc=desc,
1973 )
1974 else:
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
517 self: "Dataset" = kwargs.pop("self")
518 # apply actual function
--> 519 out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
520 datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
521 for dataset in datasets:
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
484 }
485 # apply actual function
--> 486 out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
487 datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
488 # re-apply format to the output
/usr/local/lib/python3.7/dist-packages/datasets/fingerprint.py in wrapper(*args, **kwargs)
456 # Call actual function
--> 458 out = func(self, *args, **kwargs)
460 # Update fingerprint of in-place transforms + update in-place history of transforms
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in _map_single(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset, disable_tqdm, desc, cache_only)
2340 indices,
2341 check_same_num_examples=len(input_dataset.list_indexes()) > 0,
-> 2342 offset=offset,
2343 )
2344 except NumExamplesMismatchError:
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in apply_function_on_filtered_inputs(inputs, indices, check_same_num_examples, offset)
2217 if with_rank:
2218 additional_args += (rank,)
-> 2219 processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
2220 if update_data is None:
2221 # Check if the function returns updated examples
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in decorated(item, *args, **kwargs)
1912 )
1913 # Use the LazyDict internally, while mapping the function
-> 1914 result = f(decorated_item, *args, **kwargs)
1915 # Return a standard dict
1916 return result.data if isinstance(result, LazyDict) else result
<ipython-input-71-6f845da29c00> in predict(batch)
12 def predict(batch):
---> 13 features = feature_extractor(batch["speech"], sampling_rate=feature_extractor.sampling_rate, return_tensors="pt", padding=True)
15 input_values = features.input_values.to(device)
/usr/local/lib/python3.7/dist-packages/transformers/models/wav2vec2/feature_extraction_wav2vec2.py in __call__(self, raw_speech, padding, max_length, truncation, pad_to_multiple_of, return_attention_mask, return_tensors, sampling_rate, **kwargs)
200 truncation=truncation,
201 pad_to_multiple_of=pad_to_multiple_of,
--> 202 return_attention_mask=return_attention_mask,
203 )
/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_sequence_utils.py in pad(self, processed_features, padding, max_length, truncation, pad_to_multiple_of, return_attention_mask, return_tensors)
230 batch_outputs[key].append(value)
--> 232 return BatchFeature(batch_outputs, tensor_type=return_tensors)
234 def _pad(
/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in __init__(self, data, tensor_type)
78 def __init__(self, data: Optional[Dict[str, Any]] = None, tensor_type: Union[None, str, TensorType] = None):
79 super().__init__(data)
---> 80 self.convert_to_tensors(tensor_type=tensor_type)
82 def __getitem__(self, item: str) -> Union[Any]:
/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in convert_to_tensors(self, tensor_type)
174 raise ValueError("Unable to create tensor returning overflowing values of different lengths. ")
175 raise ValueError(
--> 176 "Unable to create tensor, you should probably activate padding "
177 "with 'padding=True' to have batched tensors with the same length."
178 )
ValueError: Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.
I am working on Google Colab. Those are the environment variables:
%env LC_ALL=C.UTF-8
%env LANG=C.UTF-8
%env TRANSFORMERS_CACHE=/content/cache
%env HF_DATASETS_CACHE=/content/cache
The padding is already activated in the predict function.
Can you please help me fix it?

Skorch RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same

I'm trying to develop an image segmentation model. In the below code I keep hitting a RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same. I'm not sure why as I've tried to load both my data and my UNet model to the GPU using .cuda() (although not the skorch model-- not sure how to do that). I'm using a library for active learning, modAL, which wraps skorch.
from modAL.models import ActiveLearner
import numpy as np
import torch
from torch import nn
from torch import Tensor
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from skorch.net import NeuralNet
from modAL.models import ActiveLearner
from modAL.uncertainty import classifier_uncertainty, classifier_margin
from modAL.utils.combination import make_linear_combination, make_product
from modAL.utils.selection import multi_argmax
from modAL.uncertainty import uncertainty_sampling
from model import UNet
from skorch.net import NeuralNet
from skorch.helper import predefined_split
from torch.optim import SGD
import cv2
# Map style dataset,
class ImagesDataset(Dataset):
"""Constructs dataset of satellite images + masks"""
def __init__(self, image_paths):
self.image_paths = image_paths
def __len__(self):
return len(self.image_paths)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
print("idx:", idx)
sample_dir = self.image_paths[idx]
img_path = sample_dir +"/images/"+ Path(sample_dir).name +'.png'
mask_path = sample_dir +'/mask.png'
img, mask = cv2.imread(img_path), cv2.imread(mask_path)
print("shape of img", img.shape)
return img, mask
# turn data into dataset
train_ds = ImagesDataset(train_dirs)
val_ds = ImagesDataset(valid_dirs)
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=3, shuffle=True, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_ds, batch_size=1, shuffle=True, pin_memory=True)
# make sure data loaded in cuda for train, validation
for i, (tr, val) in enumerate(train_loader):
tr, val = tr.cuda(), val.cuda()
for i, (tr2, val2) in enumerate(val_loader):
tr2, val2 = tr2.cuda(), val2.cuda()
X, y = next(iter(train_loader))
X_train = np.array(X.reshape(3,3,1024,1024))
y_train = np.array(y.reshape(3,3,1024,1024))
X2, y2 = next(iter(val_loader))
X_test = np.array(X2.reshape(1,3,1024,1024))
y_test = np.array(y2.reshape(1,3,1024,1024))
module = UNet(pretrained=True)
if torch.cuda.is_available():
module = module.cuda()
# create the classifier
net = NeuralNet(
# assemble initial data
n_initial = 1
initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
X_initial = X_train[initial_idx]
y_initial = y_train[initial_idx]
# generate the pool, remove the initial data from the training dataset
X_pool = np.delete(X_train, initial_idx, axis=0)
y_pool = np.delete(y_train, initial_idx, axis=0)
# train the activelearner
# shape of 4D matrix is 'batch', 'channel', 'width', 'height')
learner = ActiveLearner(
estimator= net,
X_training=X_initial, y_training=y_initial,
The full error trace is:
RuntimeError Traceback (most recent call last)
<ipython-input-83-0af6007b6b72> in <module>
8 learner = ActiveLearner(
9 estimator= net,
---> 10 X_training=X_initial, y_training=y_initial,
11 # X_training=X_initial, y_training=y_initial,
12 )
~/.local/lib/python3.7/site-packages/modAL/models/learners.py in __init__(self, estimator, query_strategy, X_training, y_training, bootstrap_init, on_transformed, **fit_kwargs)
80 ) -> None:
81 super().__init__(estimator, query_strategy,
---> 82 X_training, y_training, bootstrap_init, on_transformed, **fit_kwargs)
84 def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None:
~/.local/lib/python3.7/site-packages/modAL/models/base.py in __init__(self, estimator, query_strategy, X_training, y_training, bootstrap_init, on_transformed, force_all_finite, **fit_kwargs)
70 self.y_training = y_training
71 if X_training is not None:
---> 72 self._fit_to_known(bootstrap=bootstrap_init, **fit_kwargs)
73 self.Xt_training = self.transform_without_estimating(self.X_training) if self.on_transformed else None
~/.local/lib/python3.7/site-packages/modAL/models/base.py in _fit_to_known(self, bootstrap, **fit_kwargs)
160 """
161 if not bootstrap:
--> 162 self.estimator.fit(self.X_training, self.y_training, **fit_kwargs)
163 else:
164 n_instances = self.X_training.shape[0]
~/.local/lib/python3.7/site-packages/skorch/net.py in fit(self, X, y, **fit_params)
901 self.initialize()
--> 903 self.partial_fit(X, y, **fit_params)
904 return self
~/.local/lib/python3.7/site-packages/skorch/net.py in partial_fit(self, X, y, classes, **fit_params)
860 self.notify('on_train_begin', X=X, y=y)
861 try:
--> 862 self.fit_loop(X, y, **fit_params)
863 except KeyboardInterrupt:
864 pass
~/.local/lib/python3.7/site-packages/skorch/net.py in fit_loop(self, X, y, epochs, **fit_params)
775 self.run_single_epoch(dataset_train, training=True, prefix="train",
--> 776 step_fn=self.train_step, **fit_params)
778 if dataset_valid is not None:
~/.local/lib/python3.7/site-packages/skorch/net.py in run_single_epoch(self, dataset, training, prefix, step_fn, **fit_params)
810 yi_res = yi if not is_placeholder_y else None
811 self.notify("on_batch_begin", X=Xi, y=yi_res, training=training)
--> 812 step = step_fn(Xi, yi, **fit_params)
813 self.history.record_batch(prefix + "_loss", step["loss"].item())
814 self.history.record_batch(prefix + "_batch_size", get_len(Xi))
~/.local/lib/python3.7/site-packages/skorch/net.py in train_step(self, Xi, yi, **fit_params)
707 return step['loss']
--> 709 self.optimizer_.step(step_fn)
710 return step_accumulator.get_step()
~/.local/lib/python3.7/site-packages/torch/autograd/grad_mode.py in decorate_context(*args, **kwargs)
24 def decorate_context(*args, **kwargs):
25 with self.__class__():
---> 26 return func(*args, **kwargs)
27 return cast(F, decorate_context)
~/.local/lib/python3.7/site-packages/torch/optim/sgd.py in step(self, closure)
84 if closure is not None:
85 with torch.enable_grad():
---> 86 loss = closure()
88 for group in self.param_groups:
~/.local/lib/python3.7/site-packages/skorch/net.py in step_fn()
703 def step_fn():
704 self.optimizer_.zero_grad()
--> 705 step = self.train_step_single(Xi, yi, **fit_params)
706 step_accumulator.store_step(step)
707 return step['loss']
~/.local/lib/python3.7/site-packages/skorch/net.py in train_step_single(self, Xi, yi, **fit_params)
643 """
644 self.module_.train()
--> 645 y_pred = self.infer(Xi, **fit_params)
646 loss = self.get_loss(y_pred, yi, X=Xi, training=True)
647 loss.backward()
~/.local/lib/python3.7/site-packages/skorch/net.py in infer(self, x, **fit_params)
1046 x_dict = self._merge_x_and_fit_params(x, fit_params)
1047 return self.module_(**x_dict)
-> 1048 return self.module_(x, **fit_params)
1050 def _get_predict_nonlinearity(self):
~/.local/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/al/model.py in forward(self, x)
52 def forward(self, x):
---> 53 conv1 = self.conv1(x)
54 conv2 = self.conv2(conv1)
55 conv3 = self.conv3(conv2)
~/.local/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/.local/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
115 def forward(self, input):
116 for module in self:
--> 117 input = module(input)
118 return input
~/.local/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/.local/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
422 def forward(self, input: Tensor) -> Tensor:
--> 423 return self._conv_forward(input, self.weight)
425 class Conv3d(_ConvNd):
~/.local/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
418 _pair(0), self.dilation, self.groups)
419 return F.conv2d(input, weight, self.bias, self.stride,
--> 420 self.padding, self.dilation, self.groups)
422 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same
If anyone could help that would be so so so appreciated! I've been really stuck despite searching all over-- casting my UNet model to floats has not helped and I think I've called .cuda() where I'm supposed to.
Specific things I've tried:
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same loading entries of my DataLoader to CUDA
adding pin_memory to my DataLoader
loading my skorch NeuralNetwork to CUDA as stated here Pytorch, INPUT (normal tensor) and WEIGHT (cuda tensor) mismatch (which didn't work because it's not a function in skorch)
Casting my data to float (https://discuss.pytorch.org/t/input-type-torch-cuda-doubletensor-and-weight-type-torch-cuda-floattensor-should-be-the-same/22704)
cv2.imread gives you np.uint8 data type which will be converted to PyTorch's byte. The byte type cannot be used with the float type (which is most probably used by your model).
You need to convert the byte type to float type (and to Tensor), by modifying the dataset
import torchvision.transforms as transforms
class ImagesDataset(Dataset):
"""Constructs dataset of satellite images + masks"""
def __init__(self, image_paths):
self.image_paths = image_paths
self.transform = transforms.Compose([transforms.ToTensor()])
def __len__(self):
return len(self.image_paths)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
print("idx:", idx)
sample_dir = self.image_paths[idx]
img_path = sample_dir +"/images/"+ Path(sample_dir).name +'.png'
mask_path = sample_dir +'/mask.png'
img, mask = cv2.imread(img_path), cv2.imread(mask_path)
img = self.transform(img)
mask = self.transform(mask)
print("shape of img", img.shape)
return img, mask

A simple linear regression model with a DenseVariational layer in Tensorflow-Probability returns: TypeError: 'NoneType' object is not callable

This is an attempt to use Tensforflow-Probability and more specifically the DenseVariational layer but it fails for some reason. How can I correct the code?
x_train = np.linspace(-1, 1, 100)[:, np.newaxis]
y_train = x_train + 0.3*np.random.randn(100)[:, np.newaxis]
def prior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
prior_model = Sequential([
lambda t: tfd.MultivariateNormalDiag(loc = tf.zeros(n) , scale_diag = tf.ones(n)
def posterior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
posterior_model = Sequential([
tfpl.VariableLayer(tfpl.MultivariateNormalTriL.params_size(n) , dtype = dtype), # The parameters of the model are declared Variables that are trainable
tfpl.MultivariateNormalTriL(n) # The posterior function will return to the Variational layer that will call it a MultivariateNormalTril object that will have as many dimensions
# as the parameters of the Variational Dense Layer. That means that each parameter will be generated by a distinct Normal Gaussian shifted and scaled
# by a mu and sigma learned from the data, independently of all the other weights. The output of this Variablelayer will become the input to the
# MultivariateNormalTriL object.
# The shape of the VariableLayer object will be defined by the number of parameters needed to create the MultivariateNormalTriL object given
# that it will live in a Space of n dimensions (event_size = n). This number is returned by the tfpl.MultivariateNormalTriL.params_size(n)
model = Sequential([
input_shape = (1, ), # The input is of dimensionality 1, a series
units = 1, # A linear regression is represented by a Dense layer with one single unit
make_prior_fn = prior, # We pass the function we have defined which returns the prior distribution on the weights
make_posterior_fn = posterior, # We pass the function we have defined which returns the variational approximation of the posterior distribution on the weights
kl_weight = 1/ x_train.shape[0], # Tensorflow scales the likelihood loss calculated using the mini-batch to become an unbiased estimator of the true loss but does not do the
# same for the DL divergence loss. Here we instruct it to do the necessary scaling.
kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
# the KL Divergence will be approxiated using Sampling
TypeError Traceback (most recent call last)
<ipython-input-14-e7cf0bfd5902> in <module>
17 # same for the DL divergence loss. Here we instruct it to do the necessary scaling.
---> 19 kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
20 # the KL Divergence will be approxiated using Sampling
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in __init__(self, layers, name)
140 layers = [layers]
141 for layer in layers:
--> 142 self.add(layer)
144 #property
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in add(self, layer)
204 # and create the node connecting the current layer
205 # to the input layer we just created.
--> 206 layer(x)
207 set_inputs = True
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
928 # Maintains info about the `Layer.call` stack.
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1115 try:
1116 with ops.enable_auto_cast_variables(self._compute_dtype_object):
-> 1117 outputs = call_fn(cast_inputs, *args, **kwargs)
1119 except errors.OperatorNotAllowedInGraphError as e:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
455 if conversion.is_in_whitelist_cache(f, options):
456 logging.log(2, 'Whitelisted %s: from cache', f)
--> 457 return _call_unconverted(f, args, kwargs, options, False)
459 if ag_ctx.control_status_ctx().status == ag_ctx.Status.DISABLED:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in _call_unconverted(f, args, kwargs, options, update_cache)
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow_probability\python\layers\dense_variational_v2.py in call(self, inputs)
121 q = self._posterior(inputs)
--> 122 r = self._prior(inputs)
123 self.add_loss(self._kl_divergence_fn(q, r))
TypeError: 'NoneType' object is not callable
Did you fail to return ?
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = tf.keras.Sequential([
lambda t: tfd.MultivariateNormalDiag(loc=tf.zeros(n), scale_diag=tf.ones(n)
return (prior_model)

Error when using GridSearchCV and RandomizedSearchCV

When attempting to fit my training data using either GridSearchCV or RandomizedSearchCV, I keep getting the following error:
TypeError: no supported conversion for types: (dtype('O'), dtype('O'))
Here's a sample of the relevant code:
from xgboost.sklearn import XGBRegressor as XGR
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
xgbRegModel = XGR()
params = {'max_depth':[3, 6, 9], 'learning_rate':[.05, .1, .5], 'n_estimators': [50, 100, 200]}
rscv = RandomizedSearchCV(xgbRegModel, params)
rscv.fit(X, y)
where X is a (39942, 112577) scipy.sparse.csr.csr_matrix and y is a (39942,) numpy.ndarray.
The dtypes are all either int64 or float64, and I've tried running it both with np.nan values and after filling the np.nan values with 0... (I thought that might be the problem, but no.)
Can anyone tell me what's going on here? It works just fine when I train the model without using GridSearchCV or RandomizedSearchCV.
Any ideas would be appreciated - thanks!
ps - the traceback for the error is really long, but here it is, if it helps..
TypeError Traceback (most recent call last)
<ipython-input-54-63d54d4cd03e> in <module>()
3 xgbRegModel = XGR()
4 rscv = RandomizedSearchCV(xgbRegModel, params)
----> 5 rscv.fit(X, y)
6 rscv.best_model_
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
636 error_score=self.error_score)
637 for parameters, (train, test) in product(candidate_params,
--> 638 cv.split(X, y, groups)))
640 # if one choose to see train score, "out" will contain train score info
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
334 def get(self):
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
133 def __len__(self):
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
133 def __len__(self):
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
425 start_time = time.time()
--> 427 X_train, y_train = _safe_split(estimator, X, y, train)
428 X_test, y_test = _safe_split(estimator, X, y, test, train)
~\Anaconda3\lib\site-packages\sklearn\utils\metaestimators.py in _safe_split(estimator, X, y, indices, train_indices)
198 X_subset = X[np.ix_(indices, train_indices)]
199 else:
--> 200 X_subset = safe_indexing(X, indices)
202 if y is not None:
~\Anaconda3\lib\site-packages\sklearn\utils\__init__.py in safe_indexing(X, indices)
160 return X.take(indices, axis=0)
161 else:
--> 162 return X[indices]
163 else:
164 return [X[idx] for idx in indices]
~\Anaconda3\lib\site-packages\scipy\sparse\csr.py in __getitem__(self, key)
315 if isintlike(col) or isinstance(col,slice):
316 P = extractor(row, self.shape[0]) # [[1,2],j] or [[1,2],1:2]
--> 317 extracted = P * self
318 if col == slice(None, None, None):
319 return extracted
~\Anaconda3\lib\site-packages\scipy\sparse\base.py in __mul__(self, other)
367 if self.shape[1] != other.shape[0]:
368 raise ValueError('dimension mismatch')
--> 369 return self._mul_sparse_matrix(other)
371 # If it's a list or whatever, treat it like a matrix
~\Anaconda3\lib\site-packages\scipy\sparse\compressed.py in _mul_sparse_matrix(self, other)
539 indptr = np.asarray(indptr, dtype=idx_dtype)
540 indices = np.empty(nnz, dtype=idx_dtype)
--> 541 data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
543 fn = getattr(_sparsetools, self.format + '_matmat_pass2')
~\Anaconda3\lib\site-packages\scipy\sparse\sputils.py in upcast(*args)
49 return t
---> 51 raise TypeError('no supported conversion for types: %r' % (args,))
TypeError: no supported conversion for types: (dtype('O'), dtype('O'))
Thats because GridSearchCV doesn't support sparse matrices in the fit() method. Please have a look at the signature of fit method here:
X : array-like, shape = [n_samples, n_features]
As you see its written that only array-like inputs are supported.
As for why its working normally without grid search, thats because XGBRegressor supports sparse matrices.
The actual error arises when during cross_validation, the X is splitted into train and test which doesn't work for sparse matrices same way as normal arrays.
Also, make sure that for XGBRegressor the sparse matrix is of type CSC and not CSR as you have now, because it will give you wrong results. Its described here: https://github.com/dmlc/xgboost/issues/1238
