I have a map-stype dataset, which is used for instance segmentation tasks.
The dataset is very imbalanced, in the sense that some images have only 10 objects while others have up to 1200.
How can I limit the number of objects per batch?
A minimal reproducible example is:
import math
import torch
import random
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data.sampler import BatchSampler
np.random.seed(0)
random.seed(0)
torch.manual_seed(0)
W = 700
H = 1000
def collate_fn(batch) -> tuple:
return tuple(zip(*batch))
class SyntheticDataset(Dataset):
def __init__(self, image_ids):
self.image_ids = torch.tensor(image_ids, dtype=torch.int64)
self.num_classes = 9
def __len__(self):
return len(self.image_ids)
def __getitem__(self, idx: int):
"""
returns single sample
"""
# print("idx: ", idx)
# deliberately left dangling
# id = self.image_ids[idx].item()
# image_id = self.image_ids[idx]
image_id = torch.as_tensor(idx)
image = torch.randint(0, 255, (H, W))
num_objects = random.randint(10, 1200)
image = torch.randint(0, 255, (3, H, W))
masks = torch.randint(0, 255, (num_objects, H, W))
target = {}
target["image_id"] = image_id
areas = torch.randint(100, 20000, (1, num_objects), dtype=torch.int64)
boxes = torch.randint(100, H * W, (num_objects, 4), dtype=torch.int64)
labels = torch.randint(1, self.num_classes, (1, num_objects), dtype=torch.int64)
iscrowd = torch.zeros(len(labels), dtype=torch.int64)
target["boxes"] = boxes
target["labels"] = labels
target["area"] = areas
target["iscrowd"] = iscrowd
target["masks"] = masks
return image, target, image_id
class BalancedObjectsSampler(BatchSampler):
"""Samples either batch_size images or batches num_objs_per_batch objects.
Args:
data_source (list): contains tuples of (img_id).
batch_size (int): batch size.
num_objs_per_batch (int): number of objects in a batch.
Return
yields the batch_ids/image_ids/image_indices
"""
def __init__(self, data_source, batch_size, num_objs_per_batch, drop_last=False):
self.data_source = data_source
self.sampler = data_source
self.batch_size = batch_size
self.drop_last = drop_last
self.num_objs_per_batch = num_objs_per_batch
self.batch_count = math.ceil(len(self.data_source) / self.batch_size)
def __iter__(self):
obj_count = 0
batch = []
batches = []
counter = 0
for i, (k, s) in enumerate(self.data_source.iteritems()):
if (
obj_count <= obj_count + s
and len(batch) <= self.batch_size - 1
and obj_count + s <= self.num_objs_per_batch
and i < len(self.data_source) - 1
):
# because of https://pytorch.org/docs/stable/data.html#data-loading-order-and-sampler
batch.append(i)
obj_count += s
else:
batches.append(batch)
yield batch
obj_count = 0
batch = []
counter += 1
obj_sums = {}
batch_size = 10
workers = 4
fake_image_ids = np.random.randint(1600000, 1700000, 100)
# assigning any in-range number objects count to each image
for i, k in enumerate(fake_image_ids):
obj_sums[k] = random.randint(10, 1200)
obj_counts = pd.Series(obj_sums)
train_dataset = SyntheticDataset(image_ids=fake_image_ids)
balanced_sampler = BalancedObjectsSampler(
data_source=obj_counts,
batch_size=batch_size,
num_objs_per_batch=1500,
drop_last=False,
)
data_loader_sampler = torch.utils.data.DataLoader(
train_dataset,
num_workers=workers,
collate_fn=collate_fn,
sampler=balanced_sampler,
)
data_loader_iter = torch.utils.data.DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=False,
num_workers=workers,
collate_fn=collate_fn,
)
Iterating over the balanced_sampler
for i, bal_batch in enumerate(balanced_sampler):
print(f"batch_{i}: ", bal_batch)
yields
batch_0: [0]
batch_1: [2, 3]
batch_2: [5]
batch_3: [7]
batch_4: [9, 10]
batch_5: [12, 13, 14, 15]
batch_6: [17, 18]
batch_7: [20, 21, 22]
batch_8: [24, 25]
batch_9: [27]
batch_10: [29]
batch_11: [31]
batch_12: [33]
batch_13: [35, 36, 37]
batch_14: [39, 40]
batch_15: [42, 43]
batch_16: [45, 46]
batch_17: [48, 49, 50]
batch_18: [52, 53, 54]
batch_19: [56]
batch_20: [58, 59]
batch_21: [61, 62]
batch_22: [64]
batch_23: [66]
batch_24: [68]
batch_25: [70, 71]
batch_26: [73]
batch_27: [75, 76, 77]
batch_28: [79, 80]
batch_29: [82, 83, 84, 85, 86, 87]
batch_30: [89]
batch_31: [91]
batch_32: [93, 94]
batch_33: [96]
batch_34: [98]
The above displayed values are the images' indices, but could also be the batch index or even the images' ids.
By running
for i, batch in enumerate(data_loader_sampler):
print("__sample__: ", i, len(batch[0]))
One sees that the batch contains a single sample instead of the expected amount.
__sample__: 0 1
__sample__: 1 1
__sample__: 2 1
__sample__: 3 1
__sample__: 4 1
__sample__: 5 1
__sample__: 6 1
__sample__: 7 1
__sample__: 8 1
__sample__: 9 1
__sample__: 10 1
__sample__: 11 1
__sample__: 12 1
__sample__: 13 1
__sample__: 14 1
__sample__: 15 1
__sample__: 16 1
__sample__: 17 1
__sample__: 18 1
__sample__: 19 1
__sample__: 20 1
__sample__: 21 1
__sample__: 22 1
__sample__: 23 1
__sample__: 24 1
__sample__: 25 1
__sample__: 26 1
__sample__: 27 1
__sample__: 28 1
__sample__: 29 1
__sample__: 30 1
__sample__: 31 1
__sample__: 32 1
__sample__: 33 1
__sample__: 34 1
What I am really trying to prevent is the following behavior that arises from
for i, batch in enumerate(data_loader_iter):
print("__iter__: ", i, sum([k["masks"].shape[0] for k in batch[1]]))
which is
__iter__: 0 2510
__iter__: 1 2060
__iter__: 2 2203
__iter__: 3 2815
ERROR: Unexpected bus error encountered in worker. This might be caused by insufficient shared memory (shm).
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/queues.py", line 239, in _feed
obj = _ForkingPickler.dumps(obj)
File "/usr/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
File "/blip/venv/lib/python3.8/site-packages/torch/multiprocessing/reductions.py", line 328, in reduce_storage
fd, size = storage._share_fd_()
RuntimeError: falseINTERNAL ASSERT FAILED at "../aten/src/ATen/MapAllocator.cpp":300, please report a bug to PyTorch. unable to write to file </torch_431207_56>
Traceback (most recent call last):
File "/blip/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 990, in _try_get_data
data = self._data_queue.get(timeout=timeout)
File "/usr/lib/python3.8/multiprocessing/queues.py", line 107, in get
if not self._poll(timeout):
File "/usr/lib/python3.8/multiprocessing/connection.py", line 257, in poll
return self._poll(timeout)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 424, in _poll
r = wait([self], timeout)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 931, in wait
ready = selector.select(timeout)
File "/usr/lib/python3.8/selectors.py", line 415, in select
fd_event_list = self._selector.poll(timeout)
File "/blip/venv/lib/python3.8/site-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
_error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 431257) is killed by signal: Bus error. It is possible that dataloader's workers are out of shared memory. Please try to raise your shared memory limit.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "so.py", line 170, in <module>
for i, batch in enumerate(data_loader_iter):
File "/blip/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 521, in __next__
data = self._next_data()
File "/blip/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1186, in _next_data
idx, data = self._get_data()
File "/blip/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1152, in _get_data
success, data = self._try_get_data()
File "/blip/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1003, in _try_get_data
raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 431257) exited unexpectedly
which invariably happens when the number of objects per batch is greater than ~2500.
An immediate workaround would be to set the batch_size low, I just need a more optimal solution.
If what you are trying to solve really is:
ERROR: Unexpected bus error encountered in worker. This might be caused by insufficient shared memory (shm).
You could try resizing the allocated shared memory with
# mount -o remount,size=<whatever_is_enough>G /dev/shm
However, as this is not always possible, one fix to your problem would be
class SyntheticDataset(Dataset):
def __init__(self, image_ids):
self.image_ids = torch.tensor(image_ids, dtype=torch.int64)
self.num_classes = 9
def __len__(self):
return len(self.image_ids)
def __getitem__(self, indices):
worker_info = torch.utils.data.get_worker_info()
batch = []
for i in indices:
sample = self.get_sample(i)
batch.append(sample)
gc.collect()
return batch
def get_sample(self, idx: int):
image_id = torch.as_tensor(idx)
image = torch.randint(0, 255, (H, W))
num_objects = idx
image = torch.randint(0, 255, (3, H, W))
masks = torch.randint(0, 255, (num_objects, H, W))
target = {}
target["image_id"] = image_id
areas = torch.randint(100, 20000, (1, num_objects), dtype=torch.int64)
boxes = torch.randint(100, H * W, (num_objects, 4), dtype=torch.int64)
labels = torch.randint(1, self.num_classes, (1, num_objects), dtype=torch.int64)
iscrowd = torch.zeros(len(labels), dtype=torch.int64)
target["boxes"] = boxes
target["labels"] = labels
target["area"] = areas
target["iscrowd"] = iscrowd
target["masks"] = masks
return image, target, image_id
and
class BalancedObjectsSampler(BatchSampler):
"""Samples either batch_size images or batches num_objs_per_batch objects.
Args:
data_source (list): contains tuples of (img_id).
batch_size (int): batch size.
num_objs_per_batch (int): number of objects in a batch.
Return
yields the batch_ids/image_ids/image_indices
"""
def __init__(self, data_source, batch_size, num_objs_per_batch, drop_last=False):
self.data_source = data_source
self.sampler = data_source
self.batch_size = batch_size
self.drop_last = drop_last
self.num_objs_per_batch = num_objs_per_batch
self.batch_count = math.ceil(len(self.data_source) / self.batch_size)
obj_count = 0
batch = []
batches = []
batches_sums = []
for i, (k, s) in enumerate(self.data_source.iteritems()):
if (
len(batch) < self.batch_size
and obj_count + s < self.num_objs_per_batch
and i < len(self.data_source) - 1
):
batch.append(s)
obj_count += s
else:
batches.append(len(batch))
batches_sums.append(obj_count)
obj_count = 0
batch = []
self.batches = batches
self.batch_count = len(batches)
def __iter__(self):
batch = []
img_counts_id = 0
for idx, (k, s) in enumerate(self.data_source.iteritems()):
if len(batch) < self.batches[img_counts_id] and idx < len(self.data_source):
batch.append(s)
elif len(batch) == self.batches[img_counts_id]:
gc.collect()
yield batch
batch = []
if img_counts_id < self.batch_count - 1:
img_counts_id += 1
else:
break
if len(batch) > 0 and not self.drop_last:
yield batch
def __len__(self) -> int:
if self.drop_last:
return len(self.data_source) // self.batch_size
else:
return (len(self.data_source) + self.batch_size - 1) // self.batch_size
As SyntheticDataset's __getitem__ was receiving a list of indices, the simplest solution would just iterate over the indices and retrieve a list of samples. You may just have to collate the output differently in order to feed it to your model.
For the BalancedObjectsSampler, I calculated the size of each batch within the __init__ and used it in __iter__ to assemble the batches.
NOTE: This will still fail if your num_workers > 0 for you are trying to pack at most 1500 objects into a batch - and usually one worker loads one batch at a time. Hence, you have to re-assess your num_objs_per_batch when considering using multiprocessing.
Related
I kept getting this error no matter which model I used, so I was wondering if anybody can give me pointeres what is happening? And how do I solve this issue?
Input data for this model is: http://vision.stanford.edu/aditya86/ImageNetDogs/
Most likely the issue must be stemmed from this part, but wonder which part of my code can fix this issue?:
(0) INVALID_ARGUMENT: Expected dimension in the range [0, 0), but got 0
[[{{node ArgMax}}]]
[[IteratorGetNext]]
[[IteratorGetNext/_1321]]
(1) INVALID_ARGUMENT: Expected dimension in the range [0, 0), but got 0
[[{{node ArgMax}}]]
[[IteratorGetNext]]
I'm running this code on a HPC system with GPU.
I think my data preprocessing should be fine, considering I've QAd it.
Code snippets below
Here is the error generated by the code.
Traceback (most recent call last):
File "/mnt/lustre/indy2lfs/work/mdisspt/mdisspt/y2136744/modelzoo/fc_dog_model/tf/run.py", line 292, in <module>
main()
File "/mnt/lustre/indy2lfs/work/mdisspt/mdisspt/y2136744/modelzoo/fc_dog_model/tf/run.py", line 281, in main
run(
File "/mnt/lustre/indy2lfs/work/mdisspt/mdisspt/y2136744/modelzoo/fc_dog_model/tf/run.py", line 226, in run
est.train(
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 360, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1186, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1217, in _train_model_default
return self._train_with_estimator_spec(estimator_spec, worker_hooks,
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1533, in _train_with_estimator_spec
_, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/training/monitored_session.py", line 782, in run
return self._sess.run(
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/training/monitored_session.py", line 1311, in run
return self._sess.run(
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/training/monitored_session.py", line 1416, in run
raise six.reraise(*original_exc_info)
File "/mnt/lustre/indy2lfs/sw/miniconda3/4.12.0-py39-gpu/lib/python3.9/site-packages/six.py", line 719, in reraise
raise value
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/training/monitored_session.py", line 1401, in run
return self._sess.run(*args, **kwargs)
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/training/monitored_session.py", line 1469, in run
outputs = _WrappedSession.run(
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/training/monitored_session.py", line 1232, in run
return self._sess.run(*args, **kwargs)
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/client/session.py", line 967, in run
result = self._run(None, fetches, feed_dict, options_ptr,
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/client/session.py", line 1190, in _run
results = self._do_run(handle, final_targets, final_fetches,
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/client/session.py", line 1370, in _do_run
return self._do_call(_run_fn, feeds, fetches, targets, options,
File "/mnt/lustre/indy2lfs/sw/horovod/0.25.0-gpu/python/3.9.13/lib/python3.9/site-packages/tensorflow/python/client/session.py", line 1396, in _do_call
raise type(e)(node_def, op, message) # pylint: disable=no-value-for-parameter
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:
2 root error(s) found.
(0) INVALID_ARGUMENT: Expected dimension in the range [0, 0), but got 0
[[{{node ArgMax}}]]
[[IteratorGetNext]]
[[IteratorGetNext/_1321]]
(1) INVALID_ARGUMENT: Expected dimension in the range [0, 0), but got 0
[[{{node ArgMax}}]]
[[IteratorGetNext]]
0 successful operations.
0 derived errors ignored.
srun: error: r2i4n1: task 0: Exited with exit code 1
srun: launch/slurm: _step_signal: Terminating StepId=4084847.0
Run.py
def run(
args, params, model_fn, train_input_fn=None, eval_input_fn=None,
):
dtype = tf.keras.mixed_precision.Policy(
'mixed_float16', # Important: This is required.
)
tf.keras.mixed_precision.set_global_policy(dtype)
# update and validate runtime params
runconfig_params = params["runconfig"]
update_params_from_args(args, runconfig_params)
validate_params(params)
# save params for reproducibility
save_params(params, model_dir=runconfig_params["model_dir"])
# get runtime configurations
use_cs = is_cs(runconfig_params)
csrunconfig_dict = get_csrunconfig_dict(runconfig_params)
stack_params = get_custom_stack_params(params)
# prep cs1 run environment, run config and estimator
check_env(runconfig_params)
est_config = CSRunConfig(
cs_ip=runconfig_params["cs_ip"],
stack_params=stack_params,
**csrunconfig_dict,
)
model= model_fn()
est = tf.keras.estimator.model_to_estimator(
keras_model=model,
model_dir=runconfig_params["model_dir"],
# config=est_config,
# params=params,
)
# execute based on mode
elif runconfig_params["mode"] == "train":
# est.compile(input_fn=train_input_fn)
est.train(
input_fn=train_input_fn,
steps=runconfig_params["steps"],
max_steps=runconfig_params["max_steps"],
# use_cs=use_cs,
)
def main():
"""
Main function
"""
dtype = Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(dtype)
tf.keras.backend.set_floatx('float16')
default_model_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "model_dir"
)
parser = create_arg_parser(default_model_dir)
args = parser.parse_args(sys.argv[1:])
params = get_params(args.params)
print(params)
summary_context = (
cs_disable_summaries if args.multireplica else cs_enable_summaries
)
with summary_context():
run(
args=args,
params=params,
model_fn=model_fn,
train_input_fn=train_input_fn,
# eval_input_fn=eval_input_fn,
)
if __name__ == "__main__":
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
main()
Model.py
def model_fn():
dtype = Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(dtype)
# tf.keras.backend.set_floatx('float16')
inputs = tf.keras.Input(shape=(331,331,3))
# Entry block
x = layers.Conv2D(128, 3, strides=2, padding="same")(inputs)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
activation = "softmax"
units = 1
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation=activation)(x)
estimator_model = tf.keras.Model(inputs, outputs)
estimator_model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss="categorical_crossentropy",
# metrics=['accuracy']
)
estimator_model.summary()
return estimator_model
data.py
def input_fn(params, mode=tf.estimator.ModeKeys.TRAIN):
"""
:param <dict> params: dict containing input parameters for creating dataset.
Expects the following fields:
- "data_dir" (string): path to the data files to use.
- "batch_size" (int): batch size
- "to_float16" (bool): whether to convert to float16 or not
- "drop_last_batch" (bool): whether to drop the last batch or not
"""
params = {
'train_input': {
'shuffle': True,
'data_dir': 'dog_breed_dataset', # Place to store data
'batch_size': 32,
'num_parallel_calls': 0 # 0 means AUTOTUNE
}
}
training = mode == tf.estimator.ModeKeys.TRAIN
evaluating = mode == tf.estimator.ModeKeys.EVAL
ds = None
input_params = params["train_input"]
data_dir = input_params["data_dir"]
# setting num_parallel_calls to 0 implies AUTOTUNE
num_parallel_calls = input_params.get("num_parallel_calls", 0)
batch_size = (
input_params.get("train_batch_size")
if training
else input_params.get("eval_batch_size")
)
if batch_size is None:
batch_size = input_params["batch_size"]
list_ds = tf.data.Dataset.list_files(str(data_dir+'/*/*'), shuffle=False)
class_names = np.array(sorted([item.split('/')[-1] for item in glob.glob(data_dir + '/*')]))
val_size = int(image_count * 0.2)
def get_label(file_path):
# Convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
one_hot = parts[-2] == class_names
one_hot=tf.cast(one_hot, tf.int32)
return tf.argmax(one_hot)
# return one_hot
def decode_img(img):
# Convert the compressed string to a 3D uint8 tensor
img = tf.io.decode_jpeg(img, channels=3)
img = tf.cast(img, tf.float16)
img = (img/225)
# img = tf.keras.applications.mobilenet.preprocess_input(img)
# Resize the image to the desired size
return tf.image.resize(img, [image_param['img_height'], image_param["img_width"]])
def process_path(file_path):
label = get_label(file_path)
# Load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
if training and input_params["shuffle"]:
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)
if training:
ds = list_ds.skip(val_size)
ds = ds.repeat()
else:
ds = list_ds.take(val_size)
ds = ds.map(
process_path,
num_parallel_calls=num_parallel_calls
if num_parallel_calls > 0
else tf.data.experimental.AUTOTUNE,
)
return ds
def train_input_fn(params=None):
return input_fn(params, mode=tf.estimator.ModeKeys.TRAIN)
When trying to get batch in dataloader I get a key error from torch_geometric\data\storage.py. Please see below:
import torch
from torch_geometric.data import Data, InMemoryDataset
import matplotlib.pyplot as plt
import networkx as nx
# Define the edge index and features for the first graph
x1 = torch.tensor([[1, 2], [3, 4]], dtype=torch.float)
edge_index1 = torch.tensor([[0, 1, 1, 0], [1, 0, 0, 1]], dtype=torch.long)
# Define the edge index and features for the second graph
x2 = torch.tensor([[2, 3], [1, 4]], dtype=torch.float)
edge_index2 = torch.tensor([[0, 1, 1, 0], [1, 0, 0, 1]], dtype=torch.long)
# Create a list of PyTorch Geometric Data objects to represent the graphs
data_list = [Data(x=x1, edge_index=edge_index1),
Data(x=x2, edge_index=edge_index2)]
# Define the custom dataset class
class CustomDataset(InMemoryDataset):
def __init__(self, root, transform=None, pre_transform=None):
super(CustomDataset, self).__init__(root, transform, pre_transform)
self.data, self.slices = self.collate(data_list)
#property
def raw_file_names(self):
return []
#property
def processed_file_names(self):
return []
def download(self):
pass
def process(self):
pass
def __len__(self):
return len(self.data)
def get(self, idx):
# Return the Data object at the specified index
return self.data[idx]
# Initialize the custom dataset
dataset = CustomDataset(root='./data')
# Apply transformations to the data if desired
if dataset.transform is not None:
dataset.transform = T.Compose([T.RandomRotate(30, resample=False),
T.RandomTranslate(0.1)])
# Put the dataset in a data loader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True)
# Loop over the dataloader
for batch in dataloader:
# Access the data for each graph in the batch
# x, edge_index, y = batch.x, batch.edge_index, batch.y
x, edge_index = batch.x, batch.edge_index
# Do something with the data
print(x, edge_index, y)
Error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[4], line 2
1 # Loop over the dataloader
----> 2 for batch in dataloader:
3 # Access the data for each graph in the batch
4 # x, edge_index, y = batch.x, batch.edge_index, batch.y
5 x, edge_index = batch.x, batch.edge_index
7 # Do something with the data
File ~\Anaconda3\envs\pyg\lib\site-packages\torch\utils\data\dataloader.py:681, in _BaseDataLoaderIter.__next__(self)
678 if self._sampler_iter is None:
679 # TODO(https://github.com/pytorch/pytorch/issues/76750)
680 self._reset() # type: ignore[call-arg]
--> 681 data = self._next_data()
682 self._num_yielded += 1
683 if self._dataset_kind == _DatasetKind.Iterable and \
684 self._IterableDataset_len_called is not None and \
685 self._num_yielded > self._IterableDataset_len_called:
File ~\Anaconda3\envs\pyg\lib\site-packages\torch\utils\data\dataloader.py:721, in _SingleProcessDataLoaderIter._next_data(self)
719 def _next_data(self):
720 index = self._next_index() # may raise StopIteration
--> 721 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
722 if self._pin_memory:
723 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
File ~\Anaconda3\envs\pyg\lib\site-packages\torch\utils\data\_utils\fetch.py:49, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
File ~\Anaconda3\envs\pyg\lib\site-packages\torch\utils\data\_utils\fetch.py:49, in <listcomp>(.0)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
File ~\Anaconda3\envs\pyg\lib\site-packages\torch_geometric\data\dataset.py:197, in Dataset.__getitem__(self, idx)
187 r"""In case :obj:`idx` is of type integer, will return the data object
188 at index :obj:`idx` (and transforms it in case :obj:`transform` is
189 present).
190 In case :obj:`idx` is a slicing object, *e.g.*, :obj:`[2:5]`, a list, a
191 tuple, or a :obj:`torch.Tensor` or :obj:`np.ndarray` of type long or
192 bool, will return a subset of the dataset at the specified indices."""
193 if (isinstance(idx, (int, np.integer))
194 or (isinstance(idx, Tensor) and idx.dim() == 0)
195 or (isinstance(idx, np.ndarray) and np.isscalar(idx))):
--> 197 data = self.get(self.indices()[idx])
198 data = data if self.transform is None else self.transform(data)
199 return data
Cell In[3], line 43, in CustomDataset.get(self, idx)
41 def get(self, idx):
42 # Return the Data object at the specified index
---> 43 return self.data[idx]
File ~\Anaconda3\envs\pyg\lib\site-packages\torch_geometric\data\data.py:444, in Data.__getitem__(self, key)
443 def __getitem__(self, key: str) -> Any:
--> 444 return self._store[key]
File ~\Anaconda3\envs\pyg\lib\site-packages\torch_geometric\data\storage.py:81, in BaseStorage.__getitem__(self, key)
80 def __getitem__(self, key: str) -> Any:
---> 81 return self._mapping[key]
KeyError: 0
I am a beginner with pytorch. I am trying to do an aspect based sentiment analysis. I am facing the error mentioned in the subject. My code is as follows: I request help to resolve this error. Thanks in advance. I will share the entire code and the error stack.
!pip install transformers
import transformers
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
%matplotlib inline
%config InlineBackend.figure_format='retina'
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 12, 8
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
df = pd.read_csv("/Users/user1/Downloads/auto_bio_copy.csv")
I am importing a csv file which has content and label as shown below:
df.head()
content label
0 I told him I would leave the car and come back... O O O O O O O O O O O O O O O O O O O O O O O ...
1 I had the ignition interlock device installed ... O O O B-Negative I-Negative I-Negative O O O O...
2 Aug. 23 or 24 I went to Walmart auto service d... O O O O O O O B-Negative I-Negative I-Negative...
3 Side note This is the same reaction I 'd gotte... O O O O O O O O O O O O O O O O O O O O O O O ...
4 Locked out of my car . Called for help 215pm w... O O O O O O O O O O O O O O O O O B-Negative O...
df.shape
(1999, 2)
I am converting the label values into integers as follows:
O=zero(0), B-Positive=1, I-Positive=2, B-Negative=3, I-Negative=4, B-Neutral=5, I-Neutral=6, B-Mixed=7, I-Mixed=8
df['label'] = df.label.str.replace('O', '0')
df['label'] = df.label.str.replace('B-Positive', '1')
df['label'] = df.label.str.replace('I-Positive', '2')
df['label'] = df.label.str.replace('B-Negative', '3')
df['label'] = df.label.str.replace('I-Negative', '4')
df['label'] = df.label.str.replace('B-Neutral', '5')
df['label'] = df.label.str.replace('I-Neutral', '6')
df['label'] = df.label.str.replace('B-Mixed', '7')
df['label'] = df.label.str.replace('I-Mixed', '8')
Next, converting the string to integer list as follows:
df['label'] = df['label'].str.split(' ').apply(lambda s: list(map(int, s)))
df.head()
content label
0 I told him I would leave the car and come back... [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1 I had the ignition interlock device installed ... [0, 0, 0, 3, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2 Aug. 23 or 24 I went to Walmart auto service d... [0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 0, 0, 0, 0, ...
3 Side note This is the same reaction I 'd gotte... [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
4 Locked out of my car . Called for help 215pm w... [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
PRE_TRAINED_MODEL_NAME = 'bert-base-cased'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
token_lens = []
for txt in df.content:
tokens = tokenizer.encode_plus(txt, max_length=512, add_special_tokens=True, truncation=True, return_attention_mask=True)
token_lens.append(len(tokens))
MAX_LEN = 512
class Auto_Bio_Dataset(Dataset):
def __init__(self, contents, labels, tokenizer, max_len):
self.contents = contents
self.labels = labels
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.contents)
def __getitem__(self, item):
content = str(self.contents[item])
label = self.labels[item]
encoding = self.tokenizer.encode_plus(
content,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
#padding='max_length',
pad_to_max_length=True,
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
return {
'content_text': content,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'labels': torch.tensor(label)
}
df_train, df_test = train_test_split(
df,
test_size=0.1,
random_state=RANDOM_SEED
)
df_val, df_test = train_test_split(
df_test,
test_size=0.5,
random_state=RANDOM_SEED
)
df_train.shape, df_val.shape, df_test.shape
((1799, 2), (100, 2), (100, 2))
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = Auto_Bio_Dataset(
contents=df.content.to_numpy(),
labels=df.label.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=2
)
BATCH_SIZE = 16
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
data = next(iter(train_data_loader))
data.keys()
Error is as follows:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-71-e0a71018e473> in <module>
----> 1 data = next(iter(train_data_loader))
2 data.keys()
~/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
528 if self._sampler_iter is None:
529 self._reset()
--> 530 data = self._next_data()
531 self._num_yielded += 1
532 if self._dataset_kind == _DatasetKind.Iterable and \
~/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
1222 else:
1223 del self._task_info[idx]
-> 1224 return self._process_data(data)
1225
1226 def _try_put_index(self):
~/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
1248 self._try_put_index()
1249 if isinstance(data, ExceptionWrapper):
-> 1250 data.reraise()
1251 return data
1252
~/opt/anaconda3/lib/python3.7/site-packages/torch/_utils.py in reraise(self)
455 # instantiate since we don't know how to
456 raise RuntimeError(msg) from None
--> 457 raise exception
458
459
RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
return self.collate_fn(data)
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 157, in default_collate
return elem_type({key: default_collate([d[key] for d in batch]) for key in elem})
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 157, in <dictcomp>
return elem_type({key: default_collate([d[key] for d in batch]) for key in elem})
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 138, in default_collate
return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [157] at entry 0 and [154] at entry 1
I found in some github post that this error can be because of batch size, so i changed the batch size to 8 and then the error is as follows:
BATCH_SIZE = 8
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
data = next(iter(train_data_loader))
data.keys()
RuntimeError Traceback (most recent call last)
<ipython-input-73-e0a71018e473> in <module>
----> 1 data = next(iter(train_data_loader))
2 data.keys()
~/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
528 if self._sampler_iter is None:
529 self._reset()
--> 530 data = self._next_data()
531 self._num_yielded += 1
532 if self._dataset_kind == _DatasetKind.Iterable and \
~/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
1222 else:
1223 del self._task_info[idx]
-> 1224 return self._process_data(data)
1225
1226 def _try_put_index(self):
~/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
1248 self._try_put_index()
1249 if isinstance(data, ExceptionWrapper):
-> 1250 data.reraise()
1251 return data
1252
~/opt/anaconda3/lib/python3.7/site-packages/torch/_utils.py in reraise(self)
455 # instantiate since we don't know how to
456 raise RuntimeError(msg) from None
--> 457 raise exception
458
459
RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
return self.collate_fn(data)
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 157, in default_collate
return elem_type({key: default_collate([d[key] for d in batch]) for key in elem})
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 157, in <dictcomp>
return elem_type({key: default_collate([d[key] for d in batch]) for key in elem})
File "/Users/namrathabhandarkar/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 137, in default_collate
out = elem.new(storage).resize_(len(batch), *list(elem.size()))
RuntimeError: Trying to resize storage that is not resizable
I am not sure what is causing the first error(the one mentioned in subject). I am using padding and truncate in my code, yet the error.
Any help to resolve this issue is highly appreciated.
Thanks in advance.
Quick answer: you need to implement your own collate_fn function when creating a DataLoader. See the discussion from PyTorch forum.
You should be able to pass the function object to DataLoader instantiation:
def my_collate_fn(data):
# TODO: Implement your function
# But I guess in your case it should be:
return tuple(data)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=2,
collate_fn=my_collate_fn
)
This should be the way to solving this, but as a temporary remedy in case anything is urgent or a quick test is nice, simply change batch_size to 1 to prevent torch from trying to stack things with different shapes up.
I have a yolo_non_max_suppression method as shown below:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
max_boxes_tensor = K.variable(value=max_boxes, dtype='int32')
print("printing max_boxes_tensor:",max_boxes_tensor)
print("Data type:",K.dtype(max_boxes_tensor))
init= tf.compat.v1.variables_initializer([max_boxes_tensor])
print("printing tf.compat.v1.variables_initializer:",init)
K.get_session().run(init))
nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor,
iou_threshold=iou_threshold)
scores = K.gather(scores, nms_indices)
boxes = K.gather(boxes, nms_indices)
classes = K.gather(classes, nms_indices)
return scores, boxes, classes
When I test the above code out using the following test stub:
with tf.compat.v1.Session() as test_b:
scores = tf.random.normal([54,], mean=1, stddev=4, seed = 1)
boxes = tf.random.normal([54, 4], mean=1, stddev=4, seed = 1)
classes = tf.random.normal([54,], mean=1, stddev=4, seed = 1)
scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
print("scores[2] = " + str(scores[2].eval()))
print("boxes[2] = " + str(boxes[2].eval()))
print("classes[2] = " + str(classes[2].eval()))
print("scores.shape = " + str(scores.eval().shape))
print("boxes.shape = " + str(boxes.eval().shape))
print("classes.shape = " + str(classes.eval().shape))
The get the following output:
max_boxes= 10
printing max_boxes_tensor: <tf.Variable 'Variable:0' shape=() dtype=int32>
Data type: int32
printing tf.compat.v1.variables_initializer: name: "init"
op: "NoOp"
input: "^Variable/Assign"
scores[2] = 6.938395
boxes[2] = [-5.299932 3.1379814 4.450367 0.95942086]
classes[2] = -2.2452729
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)
I now try to invoke the above method from yolo_eval as shown below:-
def yolo_eval(yolo_outputs, image_shape = (720., 1280.), max_boxes=10, score_threshold=.6, iou_threshold=.5):
box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
boxes = yolo_boxes_to_corners(box_xy, box_wh)
scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = score_threshold)
boxes = scale_boxes(boxes, image_shape)
# here i invoke the above implemented yolo_non_max_suppression
#scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes = max_boxes, iou_threshold = iou_threshold)
scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
return scores, boxes, classes
The tested the above yolo_eval method using my test stub:
with tf.compat.v1.Session() as test_b:
yolo_outputs = (tf.random.normal([19, 19, 5, 1], mean=1, stddev=4, seed = 1),
tf.random.normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
tf.random.normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
tf.random.normal([19, 19, 5, 80], mean=1, stddev=4, seed = 1))
scores, boxes, classes = yolo_eval(yolo_outputs)
print("scores[2] = " + str(scores[2].eval()))
print("boxes[2] = " + str(boxes[2].eval()))
print("classes[2] = " + str(classes[2].eval()))
print("scores.shape = " + str(scores.eval().shape))
print("boxes.shape = " + str(boxes.eval().shape))
print("classes.shape = " + str(classes.eval().shape))
I get the following output:
max_boxes= 10
printing max_boxes_tensor: <tf.Variable 'Variable_1:0' shape=() dtype=int32>
Data type: int32
printing tf.compat.v1.variables_initializer: name: "init_3"
op: "NoOp"
input: "^Variable_1/Assign"
scores[2] = 138.79124
boxes[2] = [1292.3297 -278.52167 3876.9893 -835.56494]
classes[2] = 54
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)
So far, so good. However when I test the pretrained model on images ( exactly as per Andrew's coursera assignment with absolute zero deviation, except for upgrading the code to tensorflow 2.3) , things start to behave differently.
sess = K.get_session()
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.) yolo_model = load_model(model_dir)
yolo_model.summary() # no issues here..
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) # another library method available as part of the assignment
print(yolo_outputs)
Giving the following output:
(<tf.Tensor 'Sigmoid:0' shape=(None, None, None, 5, 1) dtype=float32>, <tf.Tensor 'RealDiv:0' shape=(None, None, None, 5, 2) dtype=float32>, <tf.Tensor 'RealDiv_1:0' shape=(None, None, None, 5, 2) dtype=float32>, <tf.Tensor 'Softmax:0' shape=(None, None, None, 5, 80) dtype=float32>)
Now I call my previously implemented yolo_eval as shown below:
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)
This where I get the following error:
max_boxes= 10
printing max_boxes_tensor: <tf.Variable 'Variable:0' shape=() dtype=int32, numpy=10>
Data type: int32
printing tf.compat.v1.variables_initializer: None
TypeError Traceback (most recent call last)
<ipython-input-15-1c93a2e863cf> in <module>
----> 1 scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)
<ipython-input-9-86120a47e84f> in yolo_eval(yolo_outputs, image_shape, max_boxes, score_threshold, iou_threshold)
37 # Use one of the functions you've implemented to perform Non-max suppression with a threshold of iou_threshold (≈1 line)
38 #scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes = max_boxes, iou_threshold = iou_threshold)
---> 39 scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
40
41 ### END CODE HERE ###
<ipython-input-7-f4ff9df30d44> in yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)
25 print("Data type:",K.dtype(max_boxes_tensor))
26 print("printing tf.compat.v1.variables_initializer:",tf.compat.v1.variables_initializer([max_boxes_tensor]))
---> 27 K.get_session().run(tf.compat.v1.variables_initializer([max_boxes_tensor]))
28 #K.get_session().run(tf.global_variables_initializer())
29
~/miniconda3/lib/python3.8/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
955
956 try:
--> 957 result = self._run(None, fetches, feed_dict, options_ptr,
958 run_metadata_ptr)
959 if run_metadata:
~/miniconda3/lib/python3.8/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1163
1164 # Create a fetch handler to take care of the structure of fetches.
-> 1165 fetch_handler = _FetchHandler(
1166 self._graph, fetches, feed_dict_tensor, feed_handles=feed_handles)
1167
~/miniconda3/lib/python3.8/site-packages/tensorflow/python/client/session.py in __init__(self, graph, fetches, feeds, feed_handles)
475 """
476 with graph.as_default():
--> 477 self._fetch_mapper = _FetchMapper.for_fetch(fetches)
478 self._fetches = []
479 self._targets = []
~/miniconda3/lib/python3.8/site-packages/tensorflow/python/client/session.py in for_fetch(fetch)
260 """
261 if fetch is None:
--> 262 raise TypeError('Fetch argument %r has invalid type %r' %
263 (fetch, type(fetch)))
264 elif isinstance(fetch, (list, tuple)):
TypeError: Fetch argument None has invalid type <class 'NoneType'>
What stands out is that the tf.compat.v1.variables_initializer return None this time around. Not sure why. I am going nuts trying to get some clue.
I managed to solve it as follows:
with tf.compat.v1.Session() as sess:
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.)
model_dir=os.path.join("./model_data/","yolo.h5")
print(model_dir)
print(os.path.isfile(model_dir))
h5 = h5py.File(model_dir,'r')
yolo_model = load_model(model_dir)
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
print(yolo_outputs)
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)
out_scores, out_boxes, out_classes = predict(sess,yolo_model,"cars3.jpg")
Use with statement instead of sess = K.get_session().
with tf.compat.v1.Session() as sess:
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.)
yolo_model = load_model("model_data/yolo.h5")
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
print(yolo_outputs)
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)
out_scores, out_boxes, out_classes = predict(sess,"test.jpg")
I have a synthetic dataset consisting of features (X) and labels (y) which is used for KMeans clustering using Python 3.8 and sklearn 0.22.2 and numpy 1.19.
X.shape, y.shape
# ((100, 2), (100,))
kmeans = KMeans(n_clusters = 3, init = 'random', n_init = 10, max_iter = 300)
# Train model on scaled features-
kmeans.fit(X)
After training KMeans on 'X', I want to replace the unique (continuous) values of 'X' with the cluster centers (discreet) obtained using KMeans.
for i in range(3):
print("cluster number {0} has center = {1}".format(i + 1, kmeans.cluster_centers_[i, :]))
'''
cluster number 1 has center = [-0.7869159 1.14173859]
cluster number 2 has center = [ 1.28010442 -1.04663318]
cluster number 3 has center = [-0.54654735 0.0054752 ]
'''
set(kmeans.labels_)
# {0, 1, 2}
One way I have of doing it is:
X[np.where(clustered_labels == 0)] = val[0,:]
X[np.where(clustered_labels == 1)] = val[1,:]
X[np.where(clustered_labels == 2)] = val[2,:]
Can I do it using np.select()?
cond = [clustered_labels == i for i in range(3)]
val = kmeans.cluster_centers_[:,:]
But on executing the code:
np.select(cond, val)
I get the following error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
----> 1 np.select(cond, val)
<array_function internals> in select(*args, **kwargs)
~/.local/lib/python3.8/site-packages/numpy/lib/function_base.py in
select(condlist, choicelist, default)
693 result_shape = condlist[0].shape
694 else:
--> 695 result_shape = np.broadcast_arrays(condlist[0], choicelist[0])[0].shape
696
697 result = np.full(result_shape, choicelist[-1], dtype)
<array_function internals> in broadcast_arrays(*args, **kwargs)
~/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py in
broadcast_arrays(subok, *args)
256 args = [np.array(_m, copy=False, subok=subok) for _m in args]
257
--> 258 shape = _broadcast_shape(*args)
259
260 if all(array.shape == shape for array in args):
~/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py in
_broadcast_shape(*args)
187 # use the old-iterator because np.nditer does not handle size 0 arrays
188 # consistently
--> 189 b = np.broadcast(*args[:32])
190 # unfortunately, it cannot handle 32 or more arguments directly
191 for pos in range(32, len(args), 31):
ValueError: shape mismatch: objects cannot be broadcast to a single
shape
Suggestions?
Thanks!
Somewhat cleaner way to do it (but very similar to your way) will be the following. Here's a simple example:
from sklearn.cluster import KMeans
import numpy as np
x1 = np.random.normal(0, 2, 100)
y1 = np.random.normal(0, 1, 100)
label1 = np.ones(100)
d1 = np.column_stack([x1, y1, label1])
x2 = np.random.normal(3, 1, 100)
y2 = np.random.normal(1, 2, 100)
label2 = np.ones(100) * 2
d2 = np.column_stack([x2, y2, label2])
x3 = np.random.normal(-3, 0.5, 100)
y3 = np.random.normal(0.5, 0.25, 100)
label3 = np.ones(100) * 3
d3 = np.column_stack([x3, y3, label3])
D = np.row_stack([d1, d2, d3])
np.random.shuffle(D)
X = D[:, :2]
y = D[:, 2]
print(f'X.shape = {X.shape}, y.shape = {y.shape}')
# X.shape = (300, 2), y.shape = (300,)
kmeans = KMeans(n_clusters = 3, init = 'random', n_init = 10, max_iter = 300)
# Train model on scaled features-
kmeans.fit(X)
preds = kmeans.predict(X)
X[preds==0] = kmeans.cluster_centers_[0]
X[preds==1] = kmeans.cluster_centers_[1]
X[preds==2] = kmeans.cluster_centers_[2]
Yet another way to accomplish the task is to use the np.put method instead of the assignment as follows:
np.put(X, preds==0, kmeans.cluster_centers_[0])
np.put(X, preds==1, kmeans.cluster_centers_[1])
np.put(X, preds==2, kmeans.cluster_centers_[2])
Frankly, I don't see a way to accomplish the task by the means of the np.select function, and I guess the way you do it is the best way, based on this answer.
Cheers.