TimeoutError: Worker failed to start - python-3.x

I work in an environment conda on jupyter notebook.
When trying to create a client using processes as follows
from dask.distributed import Client, progress
client = Client(processes = True)
the following error occurs
TimeoutError Traceback (most recent call last)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
callback(f)
827 try:
--> 828 result_list.append(f.result())
829 except Exception as e:
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/concurrent.py
in result(self, timeout)
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/util.py in
raise_exc_info(exc_info)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1068 else:
-> 1069 yielded = self.gen.send(value)
1070
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in _start_worker(self, death_timeout, **kwargs)
228 self.workers.remove(w)
--> 229 raise gen.TimeoutError("Worker failed to start")
230
TimeoutError: Worker failed to start
During handling of the above exception, another exception occurred:
TimeoutError Traceback (most recent call last)
<ipython-input-26-9ebe205475b6> in <module>()
3
4 # Use all 8 cores
----> 5 cluster = LocalCluster(processes = True, n_workers = 4)
6 ##client = Client(processes = True)
7 ##client = Client('localhost:8789')
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in init(self, n_workers, threads_per_worker, processes, loop,
start, ip, scheduler_port, silence_logs, diagnostics_port, services,
worker_services, service_kwargs, asynchronous, security,
**worker_kwargs)
140 self.worker_kwargs['security'] = security
141
--> 142 self.start(ip=ip, n_workers=n_workers)
143
144 clusters_to_close.add(self)
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in start(self, **kwargs)
177 self._started = self._start(**kwargs)
178 else:
--> 179 self.sync(self._start, **kwargs)
180
181 #gen.coroutine
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in sync(self, func, *args, **kwargs)
170 return future
171 else:
--> 172 return sync(self.loop, func, *args, **kwargs)
173
174 def start(self, **kwargs):
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/utils.py
in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
/home/vlad/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/utils.py
in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/concurrent.py
in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/util.py in
raise_exc_info(exc_info)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1061 if exc_info is not None:
1062 try:
-> 1063 yielded = self.gen.throw(*exc_info)
1064 finally:
1065 # Break up a reference to itself
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in _start(self, ip, n_workers)
197 self.scheduler.start(scheduler_address)
198
--> 199 yield [self._start_worker(**self.worker_kwargs) for i in range(n_workers)]
200
201 self.status = 'running'
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/concurrent.py
in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/util.py in
raise_exc_info(exc_info)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
callback(f)
826 for f in children:
827 try:
--> 828 result_list.append(f.result())
829 except Exception as e:
830 if future.done():
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/concurrent.py
in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/util.py in
raise_exc_info(exc_info)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1067 exc_info = None
1068 else:
-> 1069 yielded = self.gen.send(value)
1070
1071 if stack_context._state.contexts is not orig_stack_contexts:
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in _start_worker(self, death_timeout, **kwargs)
227 if w.status == 'closed' and self.scheduler.status == 'running':
228 self.workers.remove(w)
--> 229 raise gen.TimeoutError("Worker failed to start")
230
231 raise gen.Return(w)
TimeoutError: Worker failed to start
How can I solve this problem? I have the following setup
python version: 3.6.9
dask version 1.1.4
distributed version 1.26.0
tornado version 4.5
conda 4.6.14

The problem was solved by updating packages dask,distributed,tornado to version respectively 2.4.0 , 2.4.0, 6.0.3

Related

Colab IndexError: Target 255 is out of bounds

I'm trying to perform an image semantic segmentation (segment mining fields) using lightning-flash. My images are all RGB/uint8/512x512 and the masks are L/uint8/512x512.
When I run the code, I get an error when fitting.
My code is this one:
import torch
import flash
from flash.image import SemanticSegmentation, SemanticSegmentationData
import os
from google.colab import drive
import ssl
drive.mount("/content/drive")
DATA_DIR = '/content/drive/MyDrive/data/'
x_train_dir = os.path.join(DATA_DIR, 'train_images')
y_train_dir = os.path.join(DATA_DIR, 'train_masks')
x_valid_dir = os.path.join(DATA_DIR, 'val_images')
y_valid_dir = os.path.join(DATA_DIR, 'val_masks')
x_test_dir = os.path.join(DATA_DIR, 'test_images')
y_test_dir = os.path.join(DATA_DIR, 'test_masks')
datamodule = SemanticSegmentationData.from_folders(
train_folder=x_train_dir,
train_target_folder=y_train_dir,
val_folder=x_valid_dir,
val_target_folder=y_valid_dir,
test_folder=x_test_dir,
test_target_folder=y_test_dir,
transform_kwargs=dict(image_size=(256, 256)),
num_classes=1,
batch_size=16,
)
#avoid ssl error
ssl._create_default_https_context = ssl._create_unverified_context
model = SemanticSegmentation(
head="unetplusplus",
backbone="densenet169",
pretrained="imagenet",
num_classes=datamodule.num_classes
)
GPUS = torch.cuda.device_count()
if GPUS > 0:
trainer = flash.Trainer(max_epochs=2, gpus=torch.cuda.device_count())
else:
trainer = flash.Trainer(max_epochs=2)
trainer.finetune(model, datamodule=datamodule, strategy="freeze")
trainer.save_checkpoint("semantic_segmentation_model.pt")
When I run the code, I get this error:
IndexError Traceback (most recent call last)
<ipython-input-7-11e2ce087ca0> in <module>
6
7 #trainer.fit(model, datamodule=datamodule)
----> 8 trainer.finetune(model, datamodule=datamodule, strategy="freeze")
9 trainer.save_checkpoint("semantic_segmentation_model.pt")
19 frames
/usr/local/lib/python3.7/dist-packages/flash/core/trainer.py in finetune(self, model, train_dataloader, val_dataloaders, datamodule, strategy, train_bn)
162 """
163 self._resolve_callbacks(model, strategy, train_bn=train_bn)
--> 164 return super().fit(model, train_dataloader, val_dataloaders, datamodule)
165
166 def predict(
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
695 self.strategy.model = model
696 self._call_and_handle_interrupt(
--> 697 self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
698 )
699
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _call_and_handle_interrupt(self, trainer_fn, *args, **kwargs)
648 return self.strategy.launcher.launch(trainer_fn, *args, trainer=self, **kwargs)
649 else:
--> 650 return trainer_fn(*args, **kwargs)
651 # TODO(awaelchli): Unify both exceptions below, where `KeyboardError` doesn't re-raise
652 except KeyboardInterrupt as exception:
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
735 ckpt_path, model_provided=True, model_connected=self.lightning_module is not None
736 )
--> 737 results = self._run(model, ckpt_path=self.ckpt_path)
738
739 assert self.state.stopped
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run(self, model, ckpt_path)
1166 self._checkpoint_connector.resume_end()
1167
-> 1168 results = self._run_stage()
1169
1170 log.detail(f"{self.__class__.__name__}: trainer tearing down")
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run_stage(self)
1252 if self.predicting:
1253 return self._run_predict()
-> 1254 return self._run_train()
1255
1256 def _pre_training_routine(self):
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run_train(self)
1274
1275 with isolate_rng():
-> 1276 self._run_sanity_check()
1277
1278 # enable train mode
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run_sanity_check(self)
1343 # run eval step
1344 with torch.no_grad():
-> 1345 val_loop.run()
1346
1347 self._call_callback_hooks("on_sanity_check_end")
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/loop.py in run(self, *args, **kwargs)
198 try:
199 self.on_advance_start(*args, **kwargs)
--> 200 self.advance(*args, **kwargs)
201 self.on_advance_end()
202 self._restarting = False
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py in advance(self, *args, **kwargs)
153 if self.num_dataloaders > 1:
154 kwargs["dataloader_idx"] = dataloader_idx
--> 155 dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs)
156
157 # store batch level output per dataloader
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/loop.py in run(self, *args, **kwargs)
198 try:
199 self.on_advance_start(*args, **kwargs)
--> 200 self.advance(*args, **kwargs)
201 self.on_advance_end()
202 self._restarting = False
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py in advance(self, data_fetcher, dl_max_batches, kwargs)
141
142 # lightning module methods
--> 143 output = self._evaluation_step(**kwargs)
144 output = self._evaluation_step_end(output)
145
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py in _evaluation_step(self, **kwargs)
238 """
239 hook_name = "test_step" if self.trainer.testing else "validation_step"
--> 240 output = self.trainer._call_strategy_hook(hook_name, *kwargs.values())
241
242 return output
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _call_strategy_hook(self, hook_name, *args, **kwargs)
1704
1705 with self.profiler.profile(f"[Strategy]{self.strategy.__class__.__name__}.{hook_name}"):
-> 1706 output = fn(*args, **kwargs)
1707
1708 # restore current_fx when nested context
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/strategies/strategy.py in validation_step(self, *args, **kwargs)
368 with self.precision_plugin.val_step_context():
369 assert isinstance(self.model, ValidationStep)
--> 370 return self.model.validation_step(*args, **kwargs)
371
372 def test_step(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]:
/usr/local/lib/python3.7/dist-packages/flash/image/segmentation/model.py in validation_step(self, batch, batch_idx)
151 def validation_step(self, batch: Any, batch_idx: int) -> Any:
152 batch = (batch[DataKeys.INPUT], batch[DataKeys.TARGET])
--> 153 return super().validation_step(batch, batch_idx)
154
155 def test_step(self, batch: Any, batch_idx: int) -> Any:
/usr/local/lib/python3.7/dist-packages/flash/core/model.py in validation_step(self, batch, batch_idx)
423
424 def validation_step(self, batch: Any, batch_idx: int) -> None:
--> 425 output = self.step(batch, batch_idx, self.val_metrics)
426 log_kwargs = {"batch_size": output.get(OutputKeys.BATCH_SIZE, None)} if _PL_GREATER_EQUAL_1_5_0 else {}
427 self.log_dict(
/usr/local/lib/python3.7/dist-packages/flash/core/model.py in step(self, batch, batch_idx, metrics)
360 output = {OutputKeys.OUTPUT: y_hat}
361 y_hat = self.to_loss_format(output[OutputKeys.OUTPUT])
--> 362 losses = {name: l_fn(y_hat, y) for name, l_fn in self.loss_fn.items()}
363
364 y_hat = self.to_metrics_format(output[OutputKeys.OUTPUT])
/usr/local/lib/python3.7/dist-packages/flash/core/model.py in <dictcomp>(.0)
360 output = {OutputKeys.OUTPUT: y_hat}
361 y_hat = self.to_loss_format(output[OutputKeys.OUTPUT])
--> 362 losses = {name: l_fn(y_hat, y) for name, l_fn in self.loss_fn.items()}
363
364 y_hat = self.to_metrics_format(output[OutputKeys.OUTPUT])
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3012 if size_average is not None or reduce is not None:
3013 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3014 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
3015
3016
IndexError: Target 255 is out of bounds.
How can I solve this problem? I researched others issues on stackoverflow and they were all related to the number of classes. But in my case, I only want to segment mining fields.

Error using tfds.load on Tensorflow Dataset

I was wondering if tensorflow 2.2 dataset has an issue on Windows release.
Here is my diagnostic code
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")
Version: 2.2.0
Eager mode: True
Hub version: 0.8.0
GPU is available
I can load the list of datasets
tfds.list_builders()
['abstract_reasoning',
'aeslc',
'aflw2k3d',
'amazon_us_reviews',
'anli',
.
.
.
'xnli',
'xsum',
'yelp_polarity_reviews']
However, I am unable to load any dataset
imdb, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
I receive the following errors
---------------------------------------------------------------------------
UnimplementedError Traceback (most recent call last)
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in try_reraise(*args, **kwargs)
398 try:
--> 399 yield
400 except Exception: # pylint: disable=broad-except
c:\python37\lib\site-packages\tensorflow_datasets\core\registered.py in builder(name, **builder_init_kwargs)
243 prefix="Failed to construct dataset {}".format(name)):
--> 244 return builder_cls(name)(**builder_kwargs)
245
c:\python37\lib\site-packages\wrapt\wrappers.py in __call__(self, *args, **kwargs)
602 return self._self_wrapper(self.__wrapped__, self._self_instance,
--> 603 args, kwargs)
604
c:\python37\lib\site-packages\tensorflow_datasets\core\api_utils.py in disallow_positional_args_dec(fn, instance, args, kwargs)
68 _check_required(fn, kwargs)
---> 69 return fn(*args, **kwargs)
70
c:\python37\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in __init__(self, data_dir, config, version)
205 else: # Use the code version (do not restore data)
--> 206 self.info.initialize_from_bucket()
207
c:\python37\lib\site-packages\tensorflow_datasets\core\dataset_info.py in initialize_from_bucket(self)
422 tmp_dir = tempfile.mkdtemp("tfds")
--> 423 data_files = gcs_utils.gcs_dataset_info_files(self.full_name)
424 if not data_files:
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\gcs_utils.py in gcs_dataset_info_files(dataset_dir)
69 """Return paths to GCS files in the given dataset directory."""
---> 70 return gcs_listdir(posixpath.join(GCS_DATASET_INFO_DIR, dataset_dir))
71
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\gcs_utils.py in gcs_listdir(dir_name)
62 root_dir = gcs_path(dir_name)
---> 63 if _is_gcs_disabled or not tf.io.gfile.exists(root_dir):
64 return None
c:\python37\lib\site-packages\tensorflow\python\lib\io\file_io.py in file_exists_v2(path)
266 try:
--> 267 _pywrap_file_io.FileExists(compat.as_bytes(path))
268 except errors.NotFoundError:
UnimplementedError: File system scheme 'gs' not implemented (file: 'gs://tfds-data/dataset_info/imdb_reviews/plain_text/1.0.0')
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-36-06930b64f980> in <module>
1 #tfds.list_builders()
----> 2 imdb, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
c:\python37\lib\site-packages\wrapt\wrappers.py in __call__(self, *args, **kwargs)
562
563 return self._self_wrapper(self.__wrapped__, self._self_instance,
--> 564 args, kwargs)
565
566 class BoundFunctionWrapper(_FunctionWrapperBase):
c:\python37\lib\site-packages\tensorflow_datasets\core\api_utils.py in disallow_positional_args_dec(fn, instance, args, kwargs)
67 _check_no_positional(fn, args, ismethod, allowed=allowed)
68 _check_required(fn, kwargs)
---> 69 return fn(*args, **kwargs)
70
71 return disallow_positional_args_dec(wrapped) # pylint: disable=no-value-for-parameter
c:\python37\lib\site-packages\tensorflow_datasets\core\registered.py in load(name, split, data_dir, batch_size, shuffle_files, download, as_supervised, decoders, read_config, with_info, builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
366 data_dir = constants.DATA_DIR
367
--> 368 dbuilder = builder(name, data_dir=data_dir, **builder_kwargs)
369 if download:
370 download_and_prepare_kwargs = download_and_prepare_kwargs or {}
c:\python37\lib\site-packages\tensorflow_datasets\core\registered.py in builder(name, **builder_init_kwargs)
242 with py_utils.try_reraise(
243 prefix="Failed to construct dataset {}".format(name)):
--> 244 return builder_cls(name)(**builder_kwargs)
245
246
c:\python37\lib\contextlib.py in __exit__(self, type, value, traceback)
128 value = type()
129 try:
--> 130 self.gen.throw(type, value, traceback)
131 except StopIteration as exc:
132 # Suppress StopIteration *unless* it's the same exception that
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in try_reraise(*args, **kwargs)
399 yield
400 except Exception: # pylint: disable=broad-except
--> 401 reraise(*args, **kwargs)
402
403
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in reraise(prefix, suffix)
390 suffix = '\n' + suffix if suffix else ''
391 msg = prefix + str(exc_value) + suffix
--> 392 six.reraise(exc_type, exc_type(msg), exc_traceback)
393
394
TypeError: __init__() missing 2 required positional arguments: 'op' and 'message'
Is the library broken? As mentioned, I am on Windows 10 machine and using Jupyter Lab.
After I reported the issue on GitHub, the problem was fixed in version 3.2.1.

dask: read parquet from Azure blob - AzureHttpError

I created a parquet file in an Azure blob using dask.dataframe.to_parquet (Moving data from a database to Azure blob storage).
I would now like to read that file. I'm doing:
STORAGE_OPTIONS={'account_name': 'ACCOUNT_NAME',
'account_key': 'ACCOUNT_KEY'}
df = dd.read_parquet('abfs://BLOB/FILE.parquet', storage_options=STORAGE_OPTIONS)
but I get an AzureHttpError:
---------------------------------------------------------------------------
AzureHttpError Traceback (most recent call last)
<ipython-input-4-2184e772e417> in <module>
3 'account_key': 'ACCOUNT_KEY'}
4
----> 5 df = dd.read_parquet('abfs://BLOB/FILE', storage_options=STORAGE_OPTIONS)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\dataframe\io\parquet\core.py in read_parquet(path, columns, filters, categories, index, storage_options, engine, gather_statistics, split_row_groups, chunksize, **kwargs)
231 filters=filters,
232 split_row_groups=split_row_groups,
--> 233 **kwargs
234 )
235 if meta.index.name is not None:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\dataframe\io\parquet\fastparquet.py in read_metadata(fs, paths, categories, index, gather_statistics, filters, **kwargs)
176 # correspond to a row group (populated below).
177 parts, pf, gather_statistics, fast_metadata = _determine_pf_parts(
--> 178 fs, paths, gather_statistics, **kwargs
179 )
180
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\dataframe\io\parquet\fastparquet.py in _determine_pf_parts(fs, paths, gather_statistics, **kwargs)
127 open_with=fs.open,
128 sep=fs.sep,
--> 129 **kwargs.get("file", {})
130 )
131 if gather_statistics is None:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\fastparquet\api.py in __init__(self, fn, verify, open_with, root, sep)
109 fn2 = join_path(fn, '_metadata')
110 self.fn = fn2
--> 111 with open_with(fn2, 'rb') as f:
112 self._parse_header(f, verify)
113 fn = fn2
~\AppData\Local\Continuum\anaconda3\lib\site-packages\fsspec\spec.py in open(self, path, mode, block_size, cache_options, **kwargs)
722 autocommit=ac,
723 cache_options=cache_options,
--> 724 **kwargs
725 )
726 if not ac:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\adlfs\core.py in _open(self, path, mode, block_size, autocommit, cache_options, **kwargs)
552 autocommit=autocommit,
553 cache_options=cache_options,
--> 554 **kwargs,
555 )
556
~\AppData\Local\Continuum\anaconda3\lib\site-packages\adlfs\core.py in __init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, **kwargs)
582 cache_type=cache_type,
583 cache_options=cache_options,
--> 584 **kwargs,
585 )
586
~\AppData\Local\Continuum\anaconda3\lib\site-packages\fsspec\spec.py in __init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, **kwargs)
954 if mode == "rb":
955 if not hasattr(self, "details"):
--> 956 self.details = fs.info(path)
957 self.size = self.details["size"]
958 self.cache = caches[cache_type](
~\AppData\Local\Continuum\anaconda3\lib\site-packages\fsspec\spec.py in info(self, path, **kwargs)
499 if out:
500 return out[0]
--> 501 out = self.ls(path, detail=True, **kwargs)
502 path = path.rstrip("/")
503 out1 = [o for o in out if o["name"].rstrip("/") == path]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\adlfs\core.py in ls(self, path, detail, invalidate_cache, delimiter, **kwargs)
446 # then return the contents
447 elif self._matches(
--> 448 container_name, path, as_directory=True, delimiter=delimiter
449 ):
450 logging.debug(f"{path} appears to be a directory")
~\AppData\Local\Continuum\anaconda3\lib\site-packages\adlfs\core.py in _matches(self, container_name, path, as_directory, delimiter)
386 prefix=path,
387 delimiter=delimiter,
--> 388 num_results=None,
389 )
390
~\AppData\Local\Continuum\anaconda3\lib\site-packages\azure\storage\blob\baseblobservice.py in list_blob_names(self, container_name, prefix, num_results, include, delimiter, marker, timeout)
1360 '_context': operation_context,
1361 '_converter': _convert_xml_to_blob_name_list}
-> 1362 resp = self._list_blobs(*args, **kwargs)
1363
1364 return ListGenerator(resp, self._list_blobs, args, kwargs)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\azure\storage\blob\baseblobservice.py in _list_blobs(self, container_name, prefix, marker, max_results, include, delimiter, timeout, _context, _converter)
1435 }
1436
-> 1437 return self._perform_request(request, _converter, operation_context=_context)
1438
1439 def get_blob_account_information(self, container_name=None, blob_name=None, timeout=None):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\azure\storage\common\storageclient.py in _perform_request(self, request, parser, parser_args, operation_context, expected_errors)
444 status_code,
445 exception_str_in_one_line)
--> 446 raise ex
447 finally:
448 # If this is a location locked operation and the location is not set,
~\AppData\Local\Continuum\anaconda3\lib\site-packages\azure\storage\common\storageclient.py in _perform_request(self, request, parser, parser_args, operation_context, expected_errors)
372 except AzureException as ex:
373 retry_context.exception = ex
--> 374 raise ex
375 except Exception as ex:
376 retry_context.exception = ex
~\AppData\Local\Continuum\anaconda3\lib\site-packages\azure\storage\common\storageclient.py in _perform_request(self, request, parser, parser_args, operation_context, expected_errors)
358 # and raised as an azure http exception
359 _http_error_handler(
--> 360 HTTPError(response.status, response.message, response.headers, response.body))
361
362 # Parse the response
~\AppData\Local\Continuum\anaconda3\lib\site-packages\azure\storage\common\_error.py in _http_error_handler(http_error)
113 ex.error_code = error_code
114
--> 115 raise ex
116
117
AzureHttpError: Server encountered an internal error. Please try again after some time. ErrorCode: InternalError
<?xml version="1.0" encoding="utf-8"?><Error><Code>InternalError</Code><Message>Server encountered an internal error. Please try again after some time.
RequestId:...
Time:2020-04-15T02:44:06.8611398Z</Message></Error>
The text of the error suggests that the service was temporarily down. If it persists, you may want to lodge an issue at adlfs; perhaps it could be as simple as more thorough retry logic on their end.

folium heatmap with `Object of type 'int64' is not JSON serializable` error

I got an error as "Object of type 'int64' is not JSON serializable" when I tried to generate heatmap from folium.
I am running my jupyter notebook in anaconda using python 3.6, and the version of folium is '0.9.1'.
df_2y_cons_LatLo.dtypes: Latitude float64;
Longitude float64;
Descriptor int64.
def generateBaseMap(default_location=[40.704652, -73.923688], default_zoom_start=11):
base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
return base_map
base_map = generateBaseMap()
hm = HeatMap(list(zip(df_2y_cons_LatLo.Latitude.values,df_2y_cons_LatLo.Longitude.values,df_2y_cons_LatLo.Descriptor.values)))
base_map.add_child(hm)
I expected to have a heatmap showing. But, after I run the above code, it gave me the error mentioned above.
TypeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
~\Anaconda3\lib\site-packages\folium\folium.py in _repr_html_(self, **kwargs)
291 self._parent = None
292 else:
--> 293 out = self._parent._repr_html_(**kwargs)
294 return out
295
~\Anaconda3\lib\site-packages\branca\element.py in _repr_html_(self, **kwargs)
326
327 """
--> 328 html = self.render(**kwargs)
329 html = "data:text/html;charset=utf-8;base64," + base64.b64encode(html.encode('utf8')).decode('utf8') # noqa
330
~\Anaconda3\lib\site-packages\branca\element.py in render(self, **kwargs)
319 """Renders the HTML representation of the element."""
320 for name, child in self._children.items():
--> 321 child.render(**kwargs)
322 return self._template.render(this=self, kwargs=kwargs)
323
~\Anaconda3\lib\site-packages\folium\folium.py in render(self, **kwargs)
368 '</style>'), name='map_style')
369
--> 370 super(Map, self).render(**kwargs)
371
372 def fit_bounds(self, bounds, padding_top_left=None,
~\Anaconda3\lib\site-packages\branca\element.py in render(self, **kwargs)
631
632 for name, element in self._children.items():
--> 633 element.render(**kwargs)
~\Anaconda3\lib\site-packages\folium\plugins\heat_map.py in render(self, **kwargs)
79
80 def render(self, **kwargs):
---> 81 super(HeatMap, self).render(**kwargs)
82
83 figure = self.get_root()
~\Anaconda3\lib\site-packages\branca\element.py in render(self, **kwargs)
627 script = self._template.module.__dict__.get('script', None)
628 if script is not None:
--> 629 figure.script.add_child(Element(script(self, kwargs)),
630 name=self.get_name())
631
~\Anaconda3\lib\site-packages\jinja2\runtime.py in __call__(self, *args, **kwargs)
573 (self.name, len(self.arguments)))
574
--> 575 return self._invoke(arguments, autoescape)
576
577 def _invoke(self, arguments, autoescape):
~\Anaconda3\lib\site-packages\jinja2\asyncsupport.py in _invoke(self, arguments, autoescape)
108 def _invoke(self, arguments, autoescape):
109 if not self._environment.is_async:
--> 110 return original_invoke(self, arguments, autoescape)
111 return async_invoke(self, arguments, autoescape)
112 return update_wrapper(_invoke, original_invoke)
~\Anaconda3\lib\site-packages\jinja2\runtime.py in _invoke(self, arguments, autoescape)
577 def _invoke(self, arguments, autoescape):
578 """This method is being swapped out by the async implementation."""
--> 579 rv = self._func(*arguments)
580 if autoescape:
581 rv = Markup(rv)
<template> in macro(l_1_this, l_1_kwargs)
~\Anaconda3\lib\site-packages\jinja2\filters.py in do_tojson(eval_ctx, value, indent)
1076 options = dict(options)
1077 options['indent'] = indent
-> 1078 return htmlsafe_json_dumps(value, dumper=dumper, **options)
1079
1080
~\Anaconda3\lib\site-packages\jinja2\utils.py in htmlsafe_json_dumps(obj, dumper, **kwargs)
563 if dumper is None:
564 dumper = json.dumps
--> 565 rv = dumper(obj, **kwargs) \
566 .replace(u'<', u'\\u003c') \
567 .replace(u'>', u'\\u003e') \
~\Anaconda3\lib\json\__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, default=default, sort_keys=sort_keys,
--> 238 **kw).encode(obj)
239
240
~\Anaconda3\lib\json\encoder.py in encode(self, o)
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
~\Anaconda3\lib\json\encoder.py in iterencode(self, o, _one_shot)
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
~\Anaconda3\lib\json\encoder.py in default(self, o)
178 """
179 raise TypeError("Object of type '%s' is not JSON serializable" %
--> 180 o.__class__.__name__)
181
182 def encode(self, o):
TypeError: Object of type 'int64' is not JSON serializable

pyspark ibmdpy giving ValueError: ('ordinal must be >= 1

I am loading data from netezza into a dataframe and then trying to write to dashdb. I am using ibmdpy to try to load the data into dashdb on bluemix. Ibmdpy requires a pandas dataframe so I convert the spark dataframe to pandas to load into dashdb.
all_disputes_df = sqlContext.read.format('jdbc').options(url='jdbc:netezza://pda1-wall.pok.ibm.com:5480/BACC_PRD_ISCNZ_GAPNZ', user=user, password=password, dbtable='METRICS.AR_EM_D2_02_AGG', driver='org.netezza.Driver').load()
from ibmdbpy import IdaDataBase
idadb = IdaDataBase(dsn='BLUDB', uid='dash107474', pwd='k5TY24AbzFjE')
print("current_schema is %s" % idadb.current_schema)
print("tables %s" % idadb.show_tables())
idadb.as_idadataframe(all_disputes_df.toPandas(), "all_disputes")
I am getting the following traceback.
ValueError Traceback (most recent call last)
<ipython-input-4-63dde713c67b> in <module>()
----> 1 idadb.as_idadataframe(all_disputes_df.toPandas(), "all_disputes")
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/dataframe.pyc in toPandas(self)
1379 """
1380 import pandas as pd
-> 1381 return pd.DataFrame.from_records(self.collect(), columns=self.columns)
1382
1383 ##########################################################################################
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/dataframe.pyc in collect(self)
279 with SCCallSiteSync(self._sc) as css:
280 port = self._jdf.collectToPython()
--> 281 return list(_load_from_socket(port, BatchedSerializer(PickleSerializer())))
282
283 #ignore_unicode_prefix
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/rdd.pyc in _load_from_socket(port, serializer)
140 try:
141 rf = sock.makefile("rb", 65536)
--> 142 for item in serializer.load_stream(rf):
143 yield item
144 finally:
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/serializers.pyc in load_stream(self, stream)
137 while True:
138 try:
--> 139 yield self._read_with_length(stream)
140 except EOFError:
141 return
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/serializers.pyc in _read_with_length(self, stream)
162 if len(obj) < length:
163 raise EOFError
--> 164 return self.loads(obj)
165
166 def dumps(self, obj):
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/serializers.pyc in loads(self, obj, encoding)
420 else:
421 def loads(self, obj, encoding=None):
--> 422 return pickle.loads(obj)
423
424
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/types.pyc in <lambda>(*a)
1157 # This is used to unpickle a Row from JVM
1158 def _create_row_inbound_converter(dataType):
-> 1159 return lambda *a: dataType.fromInternal(a)
1160
1161
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/types.pyc in fromInternal(self, obj)
563 return obj
564 if self._needSerializeAnyField:
--> 565 values = [f.fromInternal(v) for f, v in zip(self.fields, obj)]
566 else:
567 values = obj
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/types.pyc in fromInternal(self, obj)
436
437 def fromInternal(self, obj):
--> 438 return self.dataType.fromInternal(obj)
439
440
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/types.pyc in fromInternal(self, v)
174 def fromInternal(self, v):
175 if v is not None:
--> 176 return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
177
178
ValueError: ('ordinal must be >= 1', <function <lambda> at 0x7f97c0be76e0>, (u'788', u'10', u'00620000 ', u'0129101548 ', 1, u'000028628 ', 16520, Decimal('2124.76'), Decimal('2124.76'), 16525, 16525, u'000611099
Any ideas on what the problem is?
Reading your data from Netezza into dataframes fails. Everything beyond that is speculation from my side:
Could there be invalid data stored in Netezza, that throws off the deserialization into dataframes?
Maybe try some other queries, to make sure that there is no connectivity problem, no typo in the database name, things like that.

Resources