I was wondering if tensorflow 2.2 dataset has an issue on Windows release.
Here is my diagnostic code
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")
Version: 2.2.0
Eager mode: True
Hub version: 0.8.0
GPU is available
I can load the list of datasets
tfds.list_builders()
['abstract_reasoning',
'aeslc',
'aflw2k3d',
'amazon_us_reviews',
'anli',
.
.
.
'xnli',
'xsum',
'yelp_polarity_reviews']
However, I am unable to load any dataset
imdb, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
I receive the following errors
---------------------------------------------------------------------------
UnimplementedError Traceback (most recent call last)
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in try_reraise(*args, **kwargs)
398 try:
--> 399 yield
400 except Exception: # pylint: disable=broad-except
c:\python37\lib\site-packages\tensorflow_datasets\core\registered.py in builder(name, **builder_init_kwargs)
243 prefix="Failed to construct dataset {}".format(name)):
--> 244 return builder_cls(name)(**builder_kwargs)
245
c:\python37\lib\site-packages\wrapt\wrappers.py in __call__(self, *args, **kwargs)
602 return self._self_wrapper(self.__wrapped__, self._self_instance,
--> 603 args, kwargs)
604
c:\python37\lib\site-packages\tensorflow_datasets\core\api_utils.py in disallow_positional_args_dec(fn, instance, args, kwargs)
68 _check_required(fn, kwargs)
---> 69 return fn(*args, **kwargs)
70
c:\python37\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in __init__(self, data_dir, config, version)
205 else: # Use the code version (do not restore data)
--> 206 self.info.initialize_from_bucket()
207
c:\python37\lib\site-packages\tensorflow_datasets\core\dataset_info.py in initialize_from_bucket(self)
422 tmp_dir = tempfile.mkdtemp("tfds")
--> 423 data_files = gcs_utils.gcs_dataset_info_files(self.full_name)
424 if not data_files:
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\gcs_utils.py in gcs_dataset_info_files(dataset_dir)
69 """Return paths to GCS files in the given dataset directory."""
---> 70 return gcs_listdir(posixpath.join(GCS_DATASET_INFO_DIR, dataset_dir))
71
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\gcs_utils.py in gcs_listdir(dir_name)
62 root_dir = gcs_path(dir_name)
---> 63 if _is_gcs_disabled or not tf.io.gfile.exists(root_dir):
64 return None
c:\python37\lib\site-packages\tensorflow\python\lib\io\file_io.py in file_exists_v2(path)
266 try:
--> 267 _pywrap_file_io.FileExists(compat.as_bytes(path))
268 except errors.NotFoundError:
UnimplementedError: File system scheme 'gs' not implemented (file: 'gs://tfds-data/dataset_info/imdb_reviews/plain_text/1.0.0')
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-36-06930b64f980> in <module>
1 #tfds.list_builders()
----> 2 imdb, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
c:\python37\lib\site-packages\wrapt\wrappers.py in __call__(self, *args, **kwargs)
562
563 return self._self_wrapper(self.__wrapped__, self._self_instance,
--> 564 args, kwargs)
565
566 class BoundFunctionWrapper(_FunctionWrapperBase):
c:\python37\lib\site-packages\tensorflow_datasets\core\api_utils.py in disallow_positional_args_dec(fn, instance, args, kwargs)
67 _check_no_positional(fn, args, ismethod, allowed=allowed)
68 _check_required(fn, kwargs)
---> 69 return fn(*args, **kwargs)
70
71 return disallow_positional_args_dec(wrapped) # pylint: disable=no-value-for-parameter
c:\python37\lib\site-packages\tensorflow_datasets\core\registered.py in load(name, split, data_dir, batch_size, shuffle_files, download, as_supervised, decoders, read_config, with_info, builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
366 data_dir = constants.DATA_DIR
367
--> 368 dbuilder = builder(name, data_dir=data_dir, **builder_kwargs)
369 if download:
370 download_and_prepare_kwargs = download_and_prepare_kwargs or {}
c:\python37\lib\site-packages\tensorflow_datasets\core\registered.py in builder(name, **builder_init_kwargs)
242 with py_utils.try_reraise(
243 prefix="Failed to construct dataset {}".format(name)):
--> 244 return builder_cls(name)(**builder_kwargs)
245
246
c:\python37\lib\contextlib.py in __exit__(self, type, value, traceback)
128 value = type()
129 try:
--> 130 self.gen.throw(type, value, traceback)
131 except StopIteration as exc:
132 # Suppress StopIteration *unless* it's the same exception that
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in try_reraise(*args, **kwargs)
399 yield
400 except Exception: # pylint: disable=broad-except
--> 401 reraise(*args, **kwargs)
402
403
c:\python37\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in reraise(prefix, suffix)
390 suffix = '\n' + suffix if suffix else ''
391 msg = prefix + str(exc_value) + suffix
--> 392 six.reraise(exc_type, exc_type(msg), exc_traceback)
393
394
TypeError: __init__() missing 2 required positional arguments: 'op' and 'message'
Is the library broken? As mentioned, I am on Windows 10 machine and using Jupyter Lab.
After I reported the issue on GitHub, the problem was fixed in version 3.2.1.
Related
I am following this article on medium for this contest.
Everything seems to be fine up to the point where I am retrieving the dataset where I am getting a:
TypeError: '<' not supported between instances of 'L' and 'int'
My code is:
img_pipe = Pipeline([get_filenames, open_ms_tif])
mask_pipe = Pipeline([label_func, partial(open_tif, cls=TensorMask)])
db = DataBlock(blocks=(TransformBlock(img_pipe),
TransformBlock(mask_pipe)),
splitter=RandomSplitter(valid_pct=0.2, seed=42)
)
ds = db.datasets(source=train_files)
dl = db.dataloaders(source=train_files, bs=4)
train_files is a list of Paths. Here's the first 5.
[Path('nasa_rwanda_field_boundary_competition/nasa_rwanda_field_boundary_competition_source_train/nasa_rwanda_field_boundary_competition_source_train_09_2021_08/B01.tif'),
Path('nasa_rwanda_field_boundary_competition/nasa_rwanda_field_boundary_competition_source_train/nasa_rwanda_field_boundary_competition_source_train_39_2021_04/B01.tif'),
Path('nasa_rwanda_field_boundary_competition/nasa_rwanda_field_boundary_competition_source_train/nasa_rwanda_field_boundary_competition_source_train_12_2021_11/B01.tif'),
Path('nasa_rwanda_field_boundary_competition/nasa_rwanda_field_boundary_competition_source_train/nasa_rwanda_field_boundary_competition_source_train_06_2021_10/B01.tif'),
Path('nasa_rwanda_field_boundary_competition/nasa_rwanda_field_boundary_competition_source_train/nasa_rwanda_field_boundary_competition_source_train_08_2021_08/B01.tif')]
the full stack trace is:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [66], in <cell line: 10>()
2 mask_pipe = Pipeline([label_func, partial(open_tif, cls=TensorMask)])
4 db = DataBlock(blocks=(TransformBlock(img_pipe),
5 TransformBlock(mask_pipe)),
6 splitter=RandomSplitter(valid_pct=0.2, seed=42)
7 )
---> 10 ds = db.datasets(source=train_files)
11 dl = db.dataloaders(source=train_files, bs=4)
File /usr/local/lib/python3.9/dist-packages/fastai/data/block.py:147, in DataBlock.datasets(self, source, verbose)
145 splits = (self.splitter or RandomSplitter())(items)
146 pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
--> 147 return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
File /usr/local/lib/python3.9/dist-packages/fastai/data/core.py:451, in Datasets.__init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
442 def __init__(self,
443 items:list=None, # List of items to create `Datasets`
444 tfms:list|Pipeline=None, # List of `Transform`(s) or `Pipeline` to apply
(...)
448 **kwargs
449 ):
450 super().__init__(dl_type=dl_type)
--> 451 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
452 self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
File /usr/local/lib/python3.9/dist-packages/fastai/data/core.py:451, in <listcomp>(.0)
442 def __init__(self,
443 items:list=None, # List of items to create `Datasets`
444 tfms:list|Pipeline=None, # List of `Transform`(s) or `Pipeline` to apply
(...)
448 **kwargs
449 ):
450 super().__init__(dl_type=dl_type)
--> 451 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
452 self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
File /usr/local/lib/python3.9/dist-packages/fastcore/foundation.py:98, in _L_Meta.__call__(cls, x, *args, **kwargs)
96 def __call__(cls, x=None, *args, **kwargs):
97 if not args and not kwargs and x is not None and isinstance(x,cls): return x
---> 98 return super().__call__(x, *args, **kwargs)
File /usr/local/lib/python3.9/dist-packages/fastai/data/core.py:361, in TfmdLists.__init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose, dl_type)
359 if isinstance(tfms,TfmdLists): tfms = tfms.tfms
360 if isinstance(tfms,Pipeline): do_setup=False
--> 361 self.tfms = Pipeline(tfms, split_idx=split_idx)
362 store_attr('types,split_idx')
363 if do_setup:
File /usr/local/lib/python3.9/dist-packages/fastcore/transform.py:190, in Pipeline.__init__(self, funcs, split_idx)
188 else:
189 if isinstance(funcs, Transform): funcs = [funcs]
--> 190 self.fs = L(ifnone(funcs,[noop])).map(mk_transform).sorted(key='order')
191 for f in self.fs:
192 name = camel2snake(type(f).__name__)
File /usr/local/lib/python3.9/dist-packages/fastcore/foundation.py:136, in L.sorted(self, key, reverse)
--> 136 def sorted(self, key=None, reverse=False): return self._new(sorted_ex(self, key=key, reverse=reverse))
File /usr/local/lib/python3.9/dist-packages/fastcore/basics.py:619, in sorted_ex(iterable, key, reverse)
617 elif isinstance(key,int): k=itemgetter(key)
618 else: k=key
--> 619 return sorted(iterable, key=k, reverse=reverse)
TypeError: '<' not supported between instances of 'L' and 'int'
I'm not sure what thing is causing the issue. Let me know if you need more of the code.
I expected the data loader to create itself successfully.
I figured it out. It seems the TransformBlocks do not like accepting a Pipeline. I changed the
TransformBlock(img_pipe), TransformBlock(mask_pipe)
to
TransformBlock([get_filenames, open_ms_tif]), TransformBlock([label_func, partial(open_tif, cls=TensorMask)])
which removed the Pipeline wrapper.
I am new to Python and Tensorflow. While executing the tfds.load function, I got following error. I have spent hours trying to understand the error, but I'm at a loss. Any help would be appreciated.
I am using following versions: python 3.8, tensorflow 2.3 and tensorflow-datasets 1.2
ValueError Traceback (most recent call last)
<ipython-input-2-41baf13b8c3f> in <module>
----> 1 mnistdataset, mnist_info = tfds.load("mnist",
with_info=True, as_supervised=True)
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\api_utils.py in
disallow_positional_args_dec(fn, instance, args, kwargs)
50 _check_no_positional(fn, args, ismethod, allowed=allowed)
51 _check_required(fn, kwargs)
---> 52 return fn(*args, **kwargs)
53
54 return disallow_positional_args_dec(wrapped) # pylint: disable=no-value-for-parameter
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\registered.py in load(name,
split, data_dir, batch_size, in_memory, shuffle_files, download, as_supervised, decoders, with_info,
builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
298 if download:
299 download_and_prepare_kwargs = download_and_prepare_kwargs or {}
--> 300 dbuilder.download_and_prepare(**download_and_prepare_kwargs)
301
302 if as_dataset_kwargs is None:
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\api_utils.py in
disallow_positional_args_dec(fn, instance, args, kwargs)
50 _check_no_positional(fn, args, ismethod, allowed=allowed)
51 _check_required(fn, kwargs)
---> 52 return fn(*args, **kwargs)
53
54 return disallow_positional_args_dec(wrapped) # pylint: disable=no-value-for-parameter
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in
download_and_prepare(self, download_dir, download_config)
260 dl_manager = self._make_download_manager(
261 download_dir=download_dir,
--> 262 download_config=download_config)
263
264 # Currently it's not possible to overwrite the data because it would
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in
_make_download_manager(self, download_dir, download_config)
660 force_download=(download_config.download_mode == FORCE_REDOWNLOAD),
661 force_extraction=(download_config.download_mode == FORCE_REDOWNLOAD),
--> 662 register_checksums=download_config.register_checksums,
663 )
664
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\api_utils.py in
disallow_positional_args_dec(fn, instance, args, kwargs)
50 _check_no_positional(fn, args, ismethod, allowed=allowed)
51 _check_required(fn, kwargs)
---> 52 return fn(*args, **kwargs)
53
54 return disallow_positional_args_dec(wrapped) # pylint: disable=no-value-for-parameter
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in
__init__(self, download_dir, extract_dir, manual_dir, dataset_name, force_download, force_extraction,
register_checksums)
175 self._register_checksums = register_checksums
176 # All known URLs: {url: (size, checksum)}
--> 177 self._sizes_checksums = checksums.get_all_sizes_checksums()
178 # To record what is being used: {url: (size, checksum)}
179 self._recorded_sizes_checksums = {}
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\download\checksums.py in
get_all_sizes_checksums()
127 sizes_checksums = {}
128 for path in _checksum_paths().values():
--> 129 data = _get_sizes_checksums(path)
130 for url, size_checksum in data.items():
131 if (url in sizes_checksums and
~\anaconda3\envs\py3-TF2.0\lib\site-packages\tensorflow_datasets\core\download\checksums.py in
_get_sizes_checksums(checksums_path)
117 continue
118 # URL might have spaces inside, but size and checksum will not.
--> 119 url, size, checksum = line.rsplit(' ', 2)
120 checksums[url] = (int(size), checksum)
121 return checksums
ValueError: not enough values to unpack (expected 3, got 1)
From comments
After upgrading tensorflow-datasets from 1.2 to 4.2, issue was
resolved. (paraphrased from Niteya Shah)
I also was having the issues and this would solve the problems:
pip install tensorflow-datasets=4.3
I had the same problem, my solution, create a new environment just with:
conda create --name py3-TF2.0 python=3
conda activate py3-TF2.0
pip install --upgrade pip
pip install tensorflow
pip install --upgrade tensorflow
pip install tensorflow-datasets
pip install ipykernel
I work in an environment conda on jupyter notebook.
When trying to create a client using processes as follows
from dask.distributed import Client, progress
client = Client(processes = True)
the following error occurs
TimeoutError Traceback (most recent call last)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
callback(f)
827 try:
--> 828 result_list.append(f.result())
829 except Exception as e:
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/concurrent.py
in result(self, timeout)
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/util.py in
raise_exc_info(exc_info)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1068 else:
-> 1069 yielded = self.gen.send(value)
1070
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in _start_worker(self, death_timeout, **kwargs)
228 self.workers.remove(w)
--> 229 raise gen.TimeoutError("Worker failed to start")
230
TimeoutError: Worker failed to start
During handling of the above exception, another exception occurred:
TimeoutError Traceback (most recent call last)
<ipython-input-26-9ebe205475b6> in <module>()
3
4 # Use all 8 cores
----> 5 cluster = LocalCluster(processes = True, n_workers = 4)
6 ##client = Client(processes = True)
7 ##client = Client('localhost:8789')
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in init(self, n_workers, threads_per_worker, processes, loop,
start, ip, scheduler_port, silence_logs, diagnostics_port, services,
worker_services, service_kwargs, asynchronous, security,
**worker_kwargs)
140 self.worker_kwargs['security'] = security
141
--> 142 self.start(ip=ip, n_workers=n_workers)
143
144 clusters_to_close.add(self)
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in start(self, **kwargs)
177 self._started = self._start(**kwargs)
178 else:
--> 179 self.sync(self._start, **kwargs)
180
181 #gen.coroutine
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in sync(self, func, *args, **kwargs)
170 return future
171 else:
--> 172 return sync(self.loop, func, *args, **kwargs)
173
174 def start(self, **kwargs):
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/utils.py
in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
/home/vlad/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/utils.py
in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/concurrent.py
in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/util.py in
raise_exc_info(exc_info)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1061 if exc_info is not None:
1062 try:
-> 1063 yielded = self.gen.throw(*exc_info)
1064 finally:
1065 # Break up a reference to itself
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in _start(self, ip, n_workers)
197 self.scheduler.start(scheduler_address)
198
--> 199 yield [self._start_worker(**self.worker_kwargs) for i in range(n_workers)]
200
201 self.status = 'running'
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/concurrent.py
in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/util.py in
raise_exc_info(exc_info)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
callback(f)
826 for f in children:
827 try:
--> 828 result_list.append(f.result())
829 except Exception as e:
830 if future.done():
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/concurrent.py
in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/util.py in
raise_exc_info(exc_info)
/home/vlad/anaconda3/lib/python3.6/site-packages/tornado/gen.py in
run(self)
1067 exc_info = None
1068 else:
-> 1069 yielded = self.gen.send(value)
1070
1071 if stack_context._state.contexts is not orig_stack_contexts:
/home/vlad/anaconda3/lib/python3.6/site-packages/distributed/deploy/local.py
in _start_worker(self, death_timeout, **kwargs)
227 if w.status == 'closed' and self.scheduler.status == 'running':
228 self.workers.remove(w)
--> 229 raise gen.TimeoutError("Worker failed to start")
230
231 raise gen.Return(w)
TimeoutError: Worker failed to start
How can I solve this problem? I have the following setup
python version: 3.6.9
dask version 1.1.4
distributed version 1.26.0
tornado version 4.5
conda 4.6.14
The problem was solved by updating packages dask,distributed,tornado to version respectively 2.4.0 , 2.4.0, 6.0.3
I got an error as "Object of type 'int64' is not JSON serializable" when I tried to generate heatmap from folium.
I am running my jupyter notebook in anaconda using python 3.6, and the version of folium is '0.9.1'.
df_2y_cons_LatLo.dtypes: Latitude float64;
Longitude float64;
Descriptor int64.
def generateBaseMap(default_location=[40.704652, -73.923688], default_zoom_start=11):
base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
return base_map
base_map = generateBaseMap()
hm = HeatMap(list(zip(df_2y_cons_LatLo.Latitude.values,df_2y_cons_LatLo.Longitude.values,df_2y_cons_LatLo.Descriptor.values)))
base_map.add_child(hm)
I expected to have a heatmap showing. But, after I run the above code, it gave me the error mentioned above.
TypeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
~\Anaconda3\lib\site-packages\folium\folium.py in _repr_html_(self, **kwargs)
291 self._parent = None
292 else:
--> 293 out = self._parent._repr_html_(**kwargs)
294 return out
295
~\Anaconda3\lib\site-packages\branca\element.py in _repr_html_(self, **kwargs)
326
327 """
--> 328 html = self.render(**kwargs)
329 html = "data:text/html;charset=utf-8;base64," + base64.b64encode(html.encode('utf8')).decode('utf8') # noqa
330
~\Anaconda3\lib\site-packages\branca\element.py in render(self, **kwargs)
319 """Renders the HTML representation of the element."""
320 for name, child in self._children.items():
--> 321 child.render(**kwargs)
322 return self._template.render(this=self, kwargs=kwargs)
323
~\Anaconda3\lib\site-packages\folium\folium.py in render(self, **kwargs)
368 '</style>'), name='map_style')
369
--> 370 super(Map, self).render(**kwargs)
371
372 def fit_bounds(self, bounds, padding_top_left=None,
~\Anaconda3\lib\site-packages\branca\element.py in render(self, **kwargs)
631
632 for name, element in self._children.items():
--> 633 element.render(**kwargs)
~\Anaconda3\lib\site-packages\folium\plugins\heat_map.py in render(self, **kwargs)
79
80 def render(self, **kwargs):
---> 81 super(HeatMap, self).render(**kwargs)
82
83 figure = self.get_root()
~\Anaconda3\lib\site-packages\branca\element.py in render(self, **kwargs)
627 script = self._template.module.__dict__.get('script', None)
628 if script is not None:
--> 629 figure.script.add_child(Element(script(self, kwargs)),
630 name=self.get_name())
631
~\Anaconda3\lib\site-packages\jinja2\runtime.py in __call__(self, *args, **kwargs)
573 (self.name, len(self.arguments)))
574
--> 575 return self._invoke(arguments, autoescape)
576
577 def _invoke(self, arguments, autoescape):
~\Anaconda3\lib\site-packages\jinja2\asyncsupport.py in _invoke(self, arguments, autoescape)
108 def _invoke(self, arguments, autoescape):
109 if not self._environment.is_async:
--> 110 return original_invoke(self, arguments, autoescape)
111 return async_invoke(self, arguments, autoescape)
112 return update_wrapper(_invoke, original_invoke)
~\Anaconda3\lib\site-packages\jinja2\runtime.py in _invoke(self, arguments, autoescape)
577 def _invoke(self, arguments, autoescape):
578 """This method is being swapped out by the async implementation."""
--> 579 rv = self._func(*arguments)
580 if autoescape:
581 rv = Markup(rv)
<template> in macro(l_1_this, l_1_kwargs)
~\Anaconda3\lib\site-packages\jinja2\filters.py in do_tojson(eval_ctx, value, indent)
1076 options = dict(options)
1077 options['indent'] = indent
-> 1078 return htmlsafe_json_dumps(value, dumper=dumper, **options)
1079
1080
~\Anaconda3\lib\site-packages\jinja2\utils.py in htmlsafe_json_dumps(obj, dumper, **kwargs)
563 if dumper is None:
564 dumper = json.dumps
--> 565 rv = dumper(obj, **kwargs) \
566 .replace(u'<', u'\\u003c') \
567 .replace(u'>', u'\\u003e') \
~\Anaconda3\lib\json\__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, default=default, sort_keys=sort_keys,
--> 238 **kw).encode(obj)
239
240
~\Anaconda3\lib\json\encoder.py in encode(self, o)
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
~\Anaconda3\lib\json\encoder.py in iterencode(self, o, _one_shot)
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
~\Anaconda3\lib\json\encoder.py in default(self, o)
178 """
179 raise TypeError("Object of type '%s' is not JSON serializable" %
--> 180 o.__class__.__name__)
181
182 def encode(self, o):
TypeError: Object of type 'int64' is not JSON serializable
I am loading data from netezza into a dataframe and then trying to write to dashdb. I am using ibmdpy to try to load the data into dashdb on bluemix. Ibmdpy requires a pandas dataframe so I convert the spark dataframe to pandas to load into dashdb.
all_disputes_df = sqlContext.read.format('jdbc').options(url='jdbc:netezza://pda1-wall.pok.ibm.com:5480/BACC_PRD_ISCNZ_GAPNZ', user=user, password=password, dbtable='METRICS.AR_EM_D2_02_AGG', driver='org.netezza.Driver').load()
from ibmdbpy import IdaDataBase
idadb = IdaDataBase(dsn='BLUDB', uid='dash107474', pwd='k5TY24AbzFjE')
print("current_schema is %s" % idadb.current_schema)
print("tables %s" % idadb.show_tables())
idadb.as_idadataframe(all_disputes_df.toPandas(), "all_disputes")
I am getting the following traceback.
ValueError Traceback (most recent call last)
<ipython-input-4-63dde713c67b> in <module>()
----> 1 idadb.as_idadataframe(all_disputes_df.toPandas(), "all_disputes")
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/dataframe.pyc in toPandas(self)
1379 """
1380 import pandas as pd
-> 1381 return pd.DataFrame.from_records(self.collect(), columns=self.columns)
1382
1383 ##########################################################################################
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/dataframe.pyc in collect(self)
279 with SCCallSiteSync(self._sc) as css:
280 port = self._jdf.collectToPython()
--> 281 return list(_load_from_socket(port, BatchedSerializer(PickleSerializer())))
282
283 #ignore_unicode_prefix
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/rdd.pyc in _load_from_socket(port, serializer)
140 try:
141 rf = sock.makefile("rb", 65536)
--> 142 for item in serializer.load_stream(rf):
143 yield item
144 finally:
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/serializers.pyc in load_stream(self, stream)
137 while True:
138 try:
--> 139 yield self._read_with_length(stream)
140 except EOFError:
141 return
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/serializers.pyc in _read_with_length(self, stream)
162 if len(obj) < length:
163 raise EOFError
--> 164 return self.loads(obj)
165
166 def dumps(self, obj):
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/serializers.pyc in loads(self, obj, encoding)
420 else:
421 def loads(self, obj, encoding=None):
--> 422 return pickle.loads(obj)
423
424
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/types.pyc in <lambda>(*a)
1157 # This is used to unpickle a Row from JVM
1158 def _create_row_inbound_converter(dataType):
-> 1159 return lambda *a: dataType.fromInternal(a)
1160
1161
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/types.pyc in fromInternal(self, obj)
563 return obj
564 if self._needSerializeAnyField:
--> 565 values = [f.fromInternal(v) for f, v in zip(self.fields, obj)]
566 else:
567 values = obj
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/types.pyc in fromInternal(self, obj)
436
437 def fromInternal(self, obj):
--> 438 return self.dataType.fromInternal(obj)
439
440
/home/brente/spark/spark-1.6.1-bin-hadoop2.6/python/pyspark/sql/types.pyc in fromInternal(self, v)
174 def fromInternal(self, v):
175 if v is not None:
--> 176 return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
177
178
ValueError: ('ordinal must be >= 1', <function <lambda> at 0x7f97c0be76e0>, (u'788', u'10', u'00620000 ', u'0129101548 ', 1, u'000028628 ', 16520, Decimal('2124.76'), Decimal('2124.76'), 16525, 16525, u'000611099
Any ideas on what the problem is?
Reading your data from Netezza into dataframes fails. Everything beyond that is speculation from my side:
Could there be invalid data stored in Netezza, that throws off the deserialization into dataframes?
Maybe try some other queries, to make sure that there is no connectivity problem, no typo in the database name, things like that.