Python 3; MatplotLib ; Box Plot Error - python-3.x

I am new to python/ pandas and trying to create a boxplot using the iris data set.
Here is my code.:
import pandas as pd
iris_filename = '/Users/pro/Documents/Code/Data Science/Iris/IRIS.csv'
iris = pd.read_csv(iris_filename, header = None,
names= ['sepal_lenght','sepal_width','petal_lenght','petal_width','target'])
plt.boxplot(iris)
I get this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-20-e190e88674b0> in <module>()
----> 1 plt.boxplot(iris)
/anaconda/lib/python3.5/site-packages/matplotlib/pyplot.py in boxplot(x, notch, sym, vert, whis, positions, widths, patch_artist, bootstrap, usermedians, conf_intervals, meanline, showmeans, showcaps, showbox, showfliers, boxprops, labels, flierprops, medianprops, meanprops, capprops, whiskerprops, manage_xticks, autorange, zorder, hold, data)
2784 whiskerprops=whiskerprops,
2785 manage_xticks=manage_xticks, autorange=autorange,
-> 2786 zorder=zorder, data=data)
2787 finally:
2788 ax._hold = washold
/anaconda/lib/python3.5/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
1890 warnings.warn(msg % (label_namer, func.__name__),
1891 RuntimeWarning, stacklevel=2)
-> 1892 return func(ax, *args, **kwargs)
1893 pre_doc = inner.__doc__
1894 if pre_doc is None:
/anaconda/lib/python3.5/site-packages/matplotlib/axes/_axes.py in boxplot(self, x, notch, sym, vert, whis, positions, widths, patch_artist, bootstrap, usermedians, conf_intervals, meanline, showmeans, showcaps, showbox, showfliers, boxprops, labels, flierprops, medianprops, meanprops, capprops, whiskerprops, manage_xticks, autorange, zorder)
3266 bootstrap = rcParams['boxplot.bootstrap']
3267 bxpstats = cbook.boxplot_stats(x, whis=whis, bootstrap=bootstrap,
-> 3268 labels=labels, autorange=autorange)
3269 if notch is None:
3270 notch = rcParams['boxplot.notch']
/anaconda/lib/python3.5/site-packages/matplotlib/cbook.py in boxplot_stats(X, whis, bootstrap, labels, autorange)
1984
1985 # convert X to a list of lists
-> 1986 X = _reshape_2D(X)
1987
1988 ncols = len(X)
/anaconda/lib/python3.5/site-packages/matplotlib/cbook.py in _reshape_2D(X)
2245 X = [X.ravel()]
2246 else:
-> 2247 X = [X[:, i] for i in xrange(ncols)]
2248 else:
2249 raise ValueError("input `X` must have 2 or fewer dimensions")
/anaconda/lib/python3.5/site-packages/matplotlib/cbook.py in <listcomp>(.0)
2245 X = [X.ravel()]
2246 else:
-> 2247 X = [X[:, i] for i in xrange(ncols)]
2248 else:
2249 raise ValueError("input `X` must have 2 or fewer dimensions")
/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
/anaconda/lib/python3.5/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1382 """Return the cached item, item represents a label indexer."""
1383 cache = self._item_cache
-> 1384 res = cache.get(item)
1385 if res is None:
1386 values = self._data.get(item)
TypeError: unhashable type: 'slice'
I haae searched the web for this and cannot seem to find answer for this issue. I will appreciate any help on this.

You are calling matplotlib to boxplot the DataFrame iris... as you are already using Pandas for importing the .csv you should also use it for plotting:
iris.boxplot()
Pandas boxplot api

Related

jax.lax.fori_loop Abstract tracer value encountered where concrete value is expected

I've a JAX loop that looks like this where inside the step function I use min between the two arguments
import jax
def step(timestep: int, order: int = 4) -> int:
order = min(timestep + 1, order)
return order
num_steps = 10
order = 100
order = jax.lax.fori_loop(0, num_steps, step, order)
The above code fails with a jax._src.errors.ConcretizationTypeError. This is is the full stacktrace:
WARNING:jax._src.lib.xla_bridge:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
---------------------------------------------------------------------------
UnfilteredStackTrace Traceback (most recent call last)
<ipython-input-4-9ec280f437cb> in <module>
2 order = 100
----> 3 order = jax.lax.fori_loop(0, num_steps, step, order)
16 frames
/usr/local/lib/python3.8/dist-packages/jax/_src/traceback_util.py in reraise_with_filtered_traceback(*args, **kwargs)
161 try:
--> 162 return fun(*args, **kwargs)
163 except Exception as e:
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py in fori_loop(lower, upper, body_fun, init_val)
1691
-> 1692 (_, result), _ = scan(_fori_scan_body_fun(body_fun), (lower_, init_val),
1693 None, length=upper_ - lower_)
/usr/local/lib/python3.8/dist-packages/jax/_src/traceback_util.py in reraise_with_filtered_traceback(*args, **kwargs)
161 try:
--> 162 return fun(*args, **kwargs)
163 except Exception as e:
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py in scan(f, init, xs, length, reverse, unroll)
258 # necessary, a second time with modified init values.
--> 259 init_flat, carry_avals, carry_avals_out, init_tree, *rest = _create_jaxpr(init)
260 new_init_flat, changed = _promote_weak_typed_inputs(init_flat, carry_avals, carry_avals_out)
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py in _create_jaxpr(init)
244 carry_avals = tuple(_map(_abstractify, init_flat))
--> 245 jaxpr, consts, out_tree = _initial_style_jaxpr(
246 f, in_tree, (*carry_avals, *x_avals), "scan")
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/common.py in _initial_style_jaxpr(fun, in_tree, in_avals, primitive_name)
59 primitive_name: Optional[str] = None):
---> 60 jaxpr, consts, out_tree = _initial_style_open_jaxpr(
61 fun, in_tree, in_avals, primitive_name)
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/common.py in _initial_style_open_jaxpr(fun, in_tree, in_avals, primitive_name)
53 debug = pe.debug_info(fun, in_tree, False, primitive_name or "<unknown>")
---> 54 jaxpr, _, consts = pe.trace_to_jaxpr_dynamic(wrapped_fun, in_avals, debug)
55 return jaxpr, consts, out_tree()
/usr/local/lib/python3.8/dist-packages/jax/_src/profiler.py in wrapper(*args, **kwargs)
313 with TraceAnnotation(name, **decorator_kwargs):
--> 314 return func(*args, **kwargs)
315 return wrapper
/usr/local/lib/python3.8/dist-packages/jax/interpreters/partial_eval.py in trace_to_jaxpr_dynamic(fun, in_avals, debug_info, keep_inputs)
1980 main.jaxpr_stack = () # type: ignore
-> 1981 jaxpr, out_avals, consts = trace_to_subjaxpr_dynamic(
1982 fun, main, in_avals, keep_inputs=keep_inputs, debug_info=debug_info)
/usr/local/lib/python3.8/dist-packages/jax/interpreters/partial_eval.py in trace_to_subjaxpr_dynamic(fun, main, in_avals, keep_inputs, debug_info)
1997 in_tracers_ = [t for t, keep in zip(in_tracers, keep_inputs) if keep]
-> 1998 ans = fun.call_wrapped(*in_tracers_)
1999 out_tracers = map(trace.full_raise, ans)
/usr/local/lib/python3.8/dist-packages/jax/linear_util.py in call_wrapped(self, *args, **kwargs)
166 try:
--> 167 ans = self.f(*args, **dict(self.params, **kwargs))
168 except:
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py in scanned_fun(loop_carry, _)
1607 i, x = loop_carry
-> 1608 return (i + 1, body_fun()(i, x)), None
1609 return scanned_fun
<ipython-input-2-2e3345899235> in step(timestep, order)
1 def step(timestep: int, order: int = 100) -> int:
----> 2 order = min(timestep + 1, order)
3 return order
/usr/local/lib/python3.8/dist-packages/jax/core.py in __bool__(self)
633 def __nonzero__(self): return self.aval._nonzero(self)
--> 634 def __bool__(self): return self.aval._bool(self)
635 def __int__(self): return self.aval._int(self)
/usr/local/lib/python3.8/dist-packages/jax/core.py in error(self, arg)
1266 def error(self, arg):
-> 1267 raise ConcretizationTypeError(arg, fname_context)
1268 return error
UnfilteredStackTrace: jax._src.errors.ConcretizationTypeError: Abstract tracer value encountered where concrete value is expected: Traced<ShapedArray(bool[], weak_type=True)>with<DynamicJaxprTrace(level=1/0)>
The problem arose with the `bool` function.
The error occurred while tracing the function scanned_fun at /usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py:1606 for scan. This concrete value was not available in Python because it depends on the values of the argument 'loop_carry'.
See https://jax.readthedocs.io/en/latest/errors.html#jax.errors.ConcretizationTypeError
The stack trace below excludes JAX-internal frames.
The preceding is the original exception that occurred, unmodified.
--------------------
The above exception was the direct cause of the following exception:
ConcretizationTypeError Traceback (most recent call last)
<ipython-input-4-9ec280f437cb> in <module>
1 num_steps = 10
2 order = 100
----> 3 order = jax.lax.fori_loop(0, num_steps, step, order)
<ipython-input-2-2e3345899235> in step(timestep, order)
1 def step(timestep: int, order: int = 100) -> int:
----> 2 order = min(timestep + 1, order)
3 return order
ConcretizationTypeError: Abstract tracer value encountered where concrete value is expected: Traced<ShapedArray(bool[], weak_type=True)>with<DynamicJaxprTrace(level=1/0)>
The problem arose with the `bool` function.
The error occurred while tracing the function scanned_fun at /usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py:1606 for scan. This concrete value was not available in Python because it depends on the values of the argument 'loop_carry'.
See https://jax.readthedocs.io/en/latest/errors.html#jax.errors.ConcretizationTypeError
Everything works fine if instead of using jax.lax.fori_loop i use a simple python loop, but my original code will end up very slow. How can I fix this issue?
Use jax.numpy.minimum in place of min:
def step(timestep: int, order: int = 4) -> int:
order = jax.numpy.minimum(timestep + 1, order)
return order
The reason min does not work is that in the course of executing code within jit, grad, vmap, fori_loop, etc., JAX replaces concrete values with abstract tracers, and Python functions like min don't know how to handle these abstract values. See How to Think in JAX for more background on this.

Color heatmap in different colors by column in dataframe

I have a dataframe which i'm trying to plot in a heatmap.
df = pd.DataFrame(np.random.randint(0,2,size=(10, 5)),
columns=['1', '2','3','4','5'])
cluster = [1,1,2,2,3,3,4,4,5,5]
df['cluster'] = cluster
x_axis_labels = []
for i in range(1, 6):
x_axis_labels.append(i)
fig, ax = plt.subplots(figsize=(4, 10))
from scipy.ndimage import gaussian_filter
np_smooth = gaussian_filter(df, sigma=0.75)
sns.heatmap(np_smooth, cmap="YlGnBu",
xticklabels=x_axis_labels,
yticklabels=False, cbar=False)
plt.show()
And this is the output:
1
Each row of the heatmap represents a row in the df. I would like to color each cluster in a different color.
like in this photo:
2
I've added the following code but it gives me an error. Would be happy for some help!
The additional code:
cmaps = {'1': 'Blues_r', '2': 'Greens_r', '3': 'Blues_r', '4': 'Greens_r', '5': 'Blues_r', '6': 'Greens_r', '7': 'Blues_r','0': 'Greens_r'}
for clus, cmap in cmaps.items():
mask = df.apply(lambda x: x if x['cluster'] == int(clus) else 0, result_type='broadcast',
axis=1).eq(0)
sns.heatmap(np_smooth, mask=mask, cmap=cmap, xticklabels=x_axis_labels, yticklabels=False,
cbar=False, ax=ax)
plt.show()
The error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\numexpr\necompiler.py in evaluate(ex, local_dict, global_dict, out, order, casting, **kwargs)
826 try:
--> 827 compiled_ex = _numexpr_cache[numexpr_key]
828 except KeyError:
KeyError: ('a_value | b_value', (('optimization', 'aggressive'), ('truediv', True)), (('a_value', <class 'numpy.float64'>), ('b_value', <class 'bool'>)))
During handling of the above exception, another exception occurred:
NotImplementedError Traceback (most recent call last)
<ipython-input-191-2f3e7e4bc9aa> in cluster_heatmap(df, plot_name, base_num)
24 ).eq(0)
25 # plot masked heatmap on reusable ax
---> 26 sns.heatmap(np_smooth, mask=mask, cmap=cmap, ax=ax, xticklabels=x_axis_labels, yticklabels=False, cbar=False)
27
28
~\Anaconda3\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~\Anaconda3\lib\site-packages\seaborn\matrix.py in heatmap(data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, linewidths, linecolor, cbar, cbar_kws, cbar_ax, square, xticklabels, yticklabels, mask, ax, **kwargs)
540 plotter = _HeatMapper(data, vmin, vmax, cmap, center, robust, annot, fmt,
541 annot_kws, cbar, cbar_kws, xticklabels,
--> 542 yticklabels, mask)
543
544 # Add the pcolormesh kwargs here
~\Anaconda3\lib\site-packages\seaborn\matrix.py in __init__(self, data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, cbar, cbar_kws, xticklabels, yticklabels, mask)
107
108 # Validate the mask and convet to DataFrame
--> 109 mask = _matrix_mask(data, mask)
110
111 plot_data = np.ma.masked_where(np.asarray(mask), plot_data)
~\Anaconda3\lib\site-packages\seaborn\matrix.py in _matrix_mask(data, mask)
86 # This works around an issue where `plt.pcolormesh` doesn't represent
87 # missing data properly
---> 88 mask = mask | pd.isnull(data)
89
90 return mask
~\Anaconda3\lib\site-packages\pandas\core\ops.py in f(self, other, axis, level, fill_value)
2021 # Another DataFrame
2022 pass_op = op if should_series_dispatch(self, other, op) else na_op
-> 2023 return self._combine_frame(other, pass_op, fill_value, level)
2024 elif isinstance(other, ABCSeries):
2025 # For these values of `axis`, we end up dispatching to Series op,
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _combine_frame(self, other, func, fill_value, level)
5086 if ops.should_series_dispatch(this, other, func):
5087 # iterate over columns
-> 5088 return ops.dispatch_to_series(this, other, _arith_op)
5089 else:
5090 result = _arith_op(this.values, other.values)
~\Anaconda3\lib\site-packages\pandas\core\ops.py in dispatch_to_series(left, right, func, str_rep, axis)
1155 raise NotImplementedError(right)
1156
-> 1157 new_data = expressions.evaluate(column_op, str_rep, left, right)
1158
1159 result = left._constructor(new_data, index=left.index, copy=False)
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr, **eval_kwargs)
206 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b, **eval_kwargs)
209 return _evaluate_standard(op, op_str, a, b)
210
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b, truediv, reversed, **eval_kwargs)
121
122 if result is None:
--> 123 result = _evaluate_standard(op, op_str, a, b)
124
125 return result
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b, **eval_kwargs)
66 _store_test_result(False)
67 with np.errstate(all='ignore'):
---> 68 return op(a, b)
69
70
~\Anaconda3\lib\site-packages\pandas\core\ops.py in column_op(a, b)
1133 def column_op(a, b):
1134 return {i: func(a.iloc[:, i], b.iloc[:, i])
-> 1135 for i in range(len(a.columns))}
1136
1137 elif isinstance(right, ABCSeries) and axis == "columns":
~\Anaconda3\lib\site-packages\pandas\core\ops.py in <dictcomp>(.0)
1133 def column_op(a, b):
1134 return {i: func(a.iloc[:, i], b.iloc[:, i])
-> 1135 for i in range(len(a.columns))}
1136
1137 elif isinstance(right, ABCSeries) and axis == "columns":
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _arith_op(left, right)
5082 # left._binop(right, func, fill_value=fill_value)
5083 left, right = ops.fill_binop(left, right, fill_value)
-> 5084 return func(left, right)
5085
5086 if ops.should_series_dispatch(this, other, func):
~\Anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
1999
2000 try:
-> 2001 result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
2002 except TypeError:
2003 result = masked_arith_op(x, y, op)
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr, **eval_kwargs)
206 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b, **eval_kwargs)
209 return _evaluate_standard(op, op_str, a, b)
210
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b, truediv, reversed, **eval_kwargs)
112 'b_value': b_value},
113 casting='safe', truediv=truediv,
--> 114 **eval_kwargs)
115 except ValueError as detail:
116 if 'unknown type object' in str(detail):
~\Anaconda3\lib\site-packages\numexpr\necompiler.py in evaluate(ex, local_dict, global_dict, out, order, casting, **kwargs)
827 compiled_ex = _numexpr_cache[numexpr_key]
828 except KeyError:
--> 829 compiled_ex = _numexpr_cache[numexpr_key] = NumExpr(ex, signature, **context)
830 kwargs = {'out': out, 'order': order, 'casting': casting,
831 'ex_uses_vml': ex_uses_vml}
~\Anaconda3\lib\site-packages\numexpr\necompiler.py in NumExpr(ex, signature, **kwargs)
624
625 context = getContext(kwargs, frame_depth=1)
--> 626 threeAddrProgram, inputsig, tempsig, constants, input_names = precompile(ex, signature, context)
627 program = compileThreeAddrForm(threeAddrProgram)
628 return interpreter.NumExpr(inputsig.encode('ascii'),
~\Anaconda3\lib\site-packages\numexpr\necompiler.py in precompile(ex, signature, context)
569 ast = ASTNode('op', value='copy', astKind=ex.astKind, children=(ast,))
570
--> 571 ast = typeCompileAst(ast)
572
573 aliases = collapseDuplicateSubtrees(ast)
~\Anaconda3\lib\site-packages\numexpr\necompiler.py in typeCompileAst(ast)
212 raise NotImplementedError(
213 "couldn't find matching opcode for '%s'"
--> 214 % (ast.value + '_' + retsig + basesig))
215 # First just cast constants, then cast variables if necessary:
216 for i, (have, want) in enumerate(zip(basesig, sig)):
NotImplementedError: couldn't find matching opcode for 'or_bdb'
A dataframe as mask seems to give some errors. You could use mask.values to just grab the values.
The example code below makes the following changes:
the columns of the dataframe are changed from integer to float
the clusters column is left out for the gaussian_filter and for the mask
the mask is calculated by repeating the mask value for the clusters column
the directionary for the colormaps now uses numbers as keys
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(0, 2, size=(10, 5)).astype(float),
columns=['1', '2', '3', '4', '5'])
cluster = [1, 1, 2, 2, 3, 3, 4, 4, 5, 5]
df['cluster'] = cluster
x_axis_labels = range(1, 6)
fig, ax = plt.subplots(figsize=(4, 10))
from scipy.ndimage import gaussian_filter
np_smooth = gaussian_filter(df[df.columns[:-1]], sigma=0.75)
cmaps = {1: 'Reds_r', 2: 'Greys_r', 3: 'Blues_r', 4: 'Greens_r', 5: 'Purples_r', 6: 'Greens_r', 7: 'Blues_r',
0: 'Greens_r'}
for clus, cmap in cmaps.items():
mask = np.repeat((df['cluster'] != int(clus)).values.reshape(-1, 1), len(df.columns) - 1, 1)
if not mask.all():
sns.heatmap(np_smooth, mask=mask, cmap=cmap, xticklabels=x_axis_labels, yticklabels=False,
cbar=False, ax=ax)
plt.tight_layout()
plt.show()
PS: To have consistent color ranges, you might use vmin=np_smooth.min(), vmax=np_smooth.max() in the calls to sns.heatmap().

Saving Torch Hub's SSD inferenced images in an output directory

I'm using Pytorch SSD that loads a pre-trained model on the COCO dataset from Torch HUB. Modified the code in an API format to fetch some images and detect the objects in it.
Trying to save each inference image output in the /output using matplotlib's .savefig() method but getting an error:
import torch
import matplotlib.patches as patches
from matplotlib import pyplot as plt
class ObjectDetector:
def __init__(self):
self.precision = 'fp32'
self.detect_model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd', model_math=self.precision)
self.utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_ssd_processing_utils')
def process(self):
self.fetch_images()
self.create_model()
self.display_detections()
def fetch_images(self):
"""To get the images from a website"""
self.images = ['http://images.cocodataset.org/val2017/000000397133.jpg','http://images.cocodataset.org/val2017/000000037777.jpg','http://images.cocodataset.org/val2017/000000252219.jpg']
return self.images
def create_model(self):
self.detect_model.to('cuda')
self.detect_model.eval()
self.inputs = [self.utils.prepare_input(uri) for uri in self.images]
tensor = self.utils.prepare_tensor(self.inputs, self.precision == 'fp16')
with torch.no_grad():
detections_batch = self.detect_model(tensor)
results_per_input = self.utils.decode_results(detections_batch)
self.best_results_per_input = [self.utils.pick_best(results, 0.40) for results in results_per_input]
self.classes_to_labels = self.utils.get_coco_object_dictionary()
return self.best_results_per_input, self.classes_to_labels
def display_detections(self):
output_dir = "../data/vision/ssd/output"
for image_idx in range(len(self.best_results_per_input)):
fig, ax = plt.subplots(figsize=(20, 10))
# Show original, denormalized image...
image = self.inputs[image_idx] / 2 + 0.5
ax.imshow(image)
# ...with detections
bboxes, classes, confidences = self.best_results_per_input[image_idx]
for idx in range(len(bboxes)):
left, bot, right, top = bboxes[idx]
x, y, w, h = [val * 300 for val in [left, bot, right - left, top - bot]]
rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
ax.add_patch(rect)
ax.text(x, y, "{} {:.0f}%".format(self.classes_to_labels[classes[idx] - 1], confidences[idx] * 100), bbox=dict(facecolor='white', alpha=0.5))
plt.savefig(output_dir + str(image) + '.jpg')
plt.show()
if __name__== '__main__':
det = ObjectDetector()
det.process()
del det
The above code throws the following error:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-9-acbe775772c1> in <module>
63 if __name__== '__main__':
64 det = ObjectDetector()
---> 65 det.process()
66
67 del det
<ipython-input-9-acbe775772c1> in process(self)
14 self.fetch_images()
15 self.create_model()
---> 16 self.display_detections()
17
18
<ipython-input-9-acbe775772c1> in display_detections(self)
57 ax.text(x, y, "{} {:.0f}%".format(self.classes_to_labels[classes[idx] - 1], confidences[idx] * 100), bbox=dict(facecolor='white', alpha=0.5))
58
---> 59 plt.savefig(output_dir + str(image) + '.jpg')
60 plt.show()
61
~/venv38/lib/python3.8/site-packages/matplotlib/pyplot.py in savefig(*args, **kwargs)
721 def savefig(*args, **kwargs):
722 fig = gcf()
--> 723 res = fig.savefig(*args, **kwargs)
724 fig.canvas.draw_idle() # need this if 'transparent=True' to reset colors
725 return res
~/venv38/lib/python3.8/site-packages/matplotlib/figure.py in savefig(self, fname, transparent, **kwargs)
2201 self.patch.set_visible(frameon)
2202
-> 2203 self.canvas.print_figure(fname, **kwargs)
2204
2205 if frameon:
~/venv38/lib/python3.8/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, **kwargs)
2117
2118 try:
-> 2119 result = print_method(
2120 filename,
2121 dpi=dpi,
~/venv38/lib/python3.8/site-packages/matplotlib/cbook/deprecation.py in wrapper(*args, **kwargs)
356 f"%(removal)s. If any parameter follows {name!r}, they "
357 f"should be pass as keyword, not positionally.")
--> 358 return func(*args, **kwargs)
359
360 return wrapper
~/venv38/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_jpg(self, filename_or_obj, dryrun, pil_kwargs, *args, **kwargs)
597 pil_kwargs.setdefault("quality", rcParams["savefig.jpeg_quality"])
598 pil_kwargs.setdefault("dpi", (self.figure.dpi, self.figure.dpi))
--> 599 return background.save(
600 filename_or_obj, format='jpeg', **pil_kwargs)
601
~/venv38/lib/python3.8/site-packages/PIL/Image.py in save(self, fp, format, **params)
2153 fp = builtins.open(filename, "r+b")
2154 else:
-> 2155 fp = builtins.open(filename, "w+b")
2156
2157 try:
FileNotFoundError: [Errno 2] No such file or directory: '../data/vision/ssd/output[[[0.1050852 0.07895297 0.08367175]\n [0.31462591 0.31466424 0.32513717]\n [0.28277484 0.25506944 0.23508735]\n ...\n [0.42182888 0.27386384 0.07784647]\n [0.67421166 0.57844825 0.39889071]\n [0.554919 0.33316082 0.09618731]]\n\n [[0.05228582 0.03646781 0.0400054 ]\n [0.06949542 0.06235639 0.05692344]\n [0.25959795 0.18080175 0.18654409]\n ...\n [0.60428691 0.30419598 0.06168084]\n [0.62523846 0.37480789 0.15464491]\n [0.40595506 0.21335363 0.0789785 ]]\n\n [[0.10904118 0.11286539 0.09207947]\n [0.0804173 0.04945466 0.03713621]\n [0.24569849 0.12457102 0.1002835 ]\n ...\n [0.8473525 0.49805938 0.01584464]\n [0.62128949 0.34659926 0.04259144]\n [0.60784509 0.39757653 0.1146472 ]]\n\n ...\n\n [[0.54990582 0.37598903 0.20369267]\n [0.5526588 0.38010985 0.19625383]\n [0.56226779 0.38371096 0.20185737]\n ...\n [0.29863339 0.2165191 0.14226269]\n [0.30894688 0.23059896 0.16393229]\n [0.31879315 0.21973148 0.16671452]]\n\n [[0.54124921 0.37518263 0.19985079]\n [0.54947818 0.38385507 0.19607851]\n [0.54889008 0.37478852 0.18892228]\n ...\n [0.29478525 0.22002212 0.15326277]\n [0.31478406 0.23243116 0.16062237]\n [0.30818757 0.21890863 0.14786195]]\n\n [[0.53892612 0.37097071 0.1888549 ]\n [0.54983966 0.38421659 0.19571689]\n [0.55770917 0.38090676 0.18950984]\n ...\n [0.316164 0.24439232 0.16849774]\n [0.32127783 0.23892493 0.16441515]\n [0.30470566 0.21542674 0.14437993]]].jpg'
plt.savefig() can be saved like this. Also, to generate a random unique string, import uuid can be used.
def display_detections(self):
output_dir = "../data/vision/ssd/output"
for image_idx in range(len(self.best_results_per_input)):
fig, ax = plt.subplots(figsize=(20, 10))
# Show original, denormalized image...
image = self.inputs[image_idx] / 2 + 0.5
ax.imshow(image)
# ...with detections
bboxes, classes, confidences = self.best_results_per_input[image_idx]
for idx in range(len(bboxes)):
left, bot, right, top = bboxes[idx]
x, y, w, h = [val * 300 for val in [left, bot, right - left, top - bot]]
rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
ax.add_patch(rect)
ax.text(x, y, "{} {:.0f}%".format(self.classes_to_labels[classes[idx] - 1], confidences[idx] * 100), bbox=dict(facecolor='white', alpha=0.5))
plt.axis('off')
plt.savefig(output_dir + "/" + "Image" + "_" + str(uuid.uuid4()))
plt.show()

compute distance for data frame columns from python list

I have have a dataframe
data=sqlContext.createDataFrame([[33.603699,-83.967819[43.609422,-84.188726],[40.751800537,-74.066200256]],['a','b'])
and I have a list of lat/lon pairs. For each lat/lon pair in the data I want to compute the distance between each lat/lon
pair in the list. I am using code form this answer as my distance function
How to sum distances between data points in a dataset using (Py)Spark?
lat_lon_list=[[26.145677, -80.120355],[26.179337, -80.25151600000001],[26.188919, -98.21469499999999], [26.641769, -81.875031]]
def dist_2(long_x, lat_x, long_y, lat_y):
z0=np.sin(np.radians(lat_y))
z1=np.cos(np.radians(lat_y))
z3=np.radians(long_y)
return F.acos(F.sin(F.toRadians(F.col(long_x)) * z0 + \
F.cos(F.toRadians(F.col(lat_x))) * z1 * \
F.cos(F.toRadians(F.col(long_x))) - z3\
) * F.lit((6371.0)*(0.621371)))
def dist_1(x,y):
return [dist_2(x,y,c[0],c[1]) for c in lat_lon_list]
When i try to compute the distance i get the following error
data.select('a','b',dist_1('a','b')).show()
TypeErrorTraceback (most recent call last)
<ipython-input-53-8ec09912a7b1> in <module>()
24
25
---> 26 data.select('a','b',dist_1('a','b')).show()
/opt/spark/current/python/pyspark/sql/dataframe.py in select(self,
*cols)
859 [Row(name=u'Alice', age=12), Row(name=u'Bob',
age=15)]
860 """
--> 861 jdf = self._jdf.select(self._jcols(*cols))
862 return DataFrame(jdf, self.sql_ctx)
863
/opt/spark/current/python/pyspark/sql/dataframe.py in _jcols(self,
*cols)
714 if len(cols) == 1 and isinstance(cols[0], list):
715 cols = cols[0]
--> 716 return self._jseq(cols, _to_java_column)
717
718 def _sort_cols(self, cols, kwargs):
/opt/spark/current/python/pyspark/sql/dataframe.py in _jseq(self,
cols, converter)
701 def _jseq(self, cols, converter=None):
702 """Return a JVM Seq of Columns from a list of Column
or names"""
--> 703 return _to_seq(self.sql_ctx._sc, cols, converter)
704
705 def _jmap(self, jm):
/opt/spark/current/python/pyspark/sql/column.py in _to_seq(sc, cols,
converter)
57 """
58 if converter:
---> 59 cols = [converter(c) for c in cols]
60 return sc._jvm.PythonUtils.toSeq(cols)
61
/opt/spark/current/python/pyspark/sql/column.py in
_to_java_column(col)
45 jcol = col._jc
46 else:
---> 47 jcol = _create_column_from_name(col)
48 return jcol
49
/opt/spark/current/python/pyspark/sql/column.py in
_create_column_from_name(name)
38 def _create_column_from_name(name):
39 sc = SparkContext._active_spark_context
---> 40 return sc._jvm.functions.col(name)
41
42
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in __call__(self, *args)
1122
1123 def __call__(self, *args):
-> 1124 args_command, temp_args = self._build_args(*args)
1125
1126 command = proto.CALL_COMMAND_NAME +\
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in _build_args(self, *args)
1086 def _build_args(self, *args):
1087 if self.converters is not None and
len(self.converters) > 0:
-> 1088 (new_args, temp_args) = self._get_args(args)
1089 else:
1090 new_args = args
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in _get_args(self, args)
1073 for converter in
self.gateway_client.converters:
1074 if converter.can_convert(arg):
-> 1075 temp_arg = converter.convert(arg,
self.gateway_client)
1076 temp_args.append(temp_arg)
1077 new_args.append(temp_arg)
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_collections.py in convert(self, object,
gateway_client)
499 java_list = ArrayList()
500 for element in object:
--> 501 java_list.add(element)
502 return java_list
503
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in __call__(self, *args)
1122
1123 def __call__(self, *args):
-> 1124 args_command, temp_args = self._build_args(*args)
1125
1126 command = proto.CALL_COMMAND_NAME +\
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in _build_args(self, *args)
1086 def _build_args(self, *args):
1087 if self.converters is not None and
len(self.converters) > 0:
-> 1088 (new_args, temp_args) = self._get_args(args)
1089 else:
1090 new_args = args
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in _get_args(self, args)
1073 for converter in
self.gateway_client.converters:
1074 if converter.can_convert(arg):
-> 1075 temp_arg = converter.convert(arg,
self.gateway_client)
1076 temp_args.append(temp_arg)
1077 new_args.append(temp_arg)
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_collections.py in convert(self, object,
gateway_client)
510 HashMap = JavaClass("java.util.HashMap",
gateway_client)
511 java_map = HashMap()
--> 512 for key in object.keys():
513 java_map[key] = object[key]
514 return java_map
TypeError: 'Column' object is not callable
Any help would be appreciated.
This is because your function returns a list. You can unpack:
data.select('a','b', *dist_1('a','b'))
or combine:
data.select(['a','b'] + dist_1('a','b'))

Using the natural language toolkit in Jupyter notebook

Hello I am am trying to use the nltk to tokenize and generate some pos tags but I get error response in spite of of importing the nltk
bs=BeautifulSoup(web.text, 'html.parser')
print (bs)
tokes=nltk.word_tokenize (bs)
tags= nltk.pos_tag(tokes)
TypeError Traceback (most recent call last)
<ipython-input-71-f1434047d3f5> in <module>()
1 bs=BeautifulSoup(web.text, 'html.parser')
2 print (bs)
----> 3 tokes=nltk.word_tokenize (bs)
4 tags= nltk.pos_tag(tokes)
5 tags
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\__init__.py in word_tokenize(text, language)
104 :param language: the model name in the Punkt corpus
105 """
--> 106 return [token for sent in sent_tokenize(text, language)
107 for token in _treebank_word_tokenize(sent)]
108
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\__init__.py in sent_tokenize(text, language)
89 """
90 tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))
---> 91 return tokenizer.tokenize(text)
92
93 # Standard word tokenizer.
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\punkt.py in tokenize(self, text, realign_boundaries)
1224 Given a text, returns a list of the sentences in that text.
1225 """
-> 1226 return list(self.sentences_from_text(text, realign_boundaries))
1227
1228 def debug_decisions(self, text):
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\punkt.py in sentences_from_text(self, text, realign_boundaries)
1272 follows the period.
1273 """
-> 1274 return [text[s:e] for s, e in self.span_tokenize(text, realign_boundaries)]
1275
1276 def _slices_from_text(self, text):
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\punkt.py in span_tokenize(self, text, realign_boundaries)
1263 if realign_boundaries:
1264 slices = self._realign_boundaries(text, slices)
-> 1265 return [(sl.start, sl.stop) for sl in slices]
1266
1267 def sentences_from_text(self, text, realign_boundaries=True):
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\punkt.py in <listcomp>(.0)
1263 if realign_boundaries:
1264 slices = self._realign_boundaries(text, slices)
-> 1265 return [(sl.start, sl.stop) for sl in slices]
1266
1267 def sentences_from_text(self, text, realign_boundaries=True):
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\punkt.py in _realign_boundaries(self, text, slices)
1302 """
1303 realign = 0
-> 1304 for sl1, sl2 in _pair_iter(slices):
1305 sl1 = slice(sl1.start + realign, sl1.stop)
1306 if not sl2:
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\punkt.py in _pair_iter(it)
308 """
309 it = iter(it)
--> 310 prev = next(it)
311 for el in it:
312 yield (prev, el)
C:\Users\DESDEJEI\Anaconda3\lib\site-packages\nltk\tokenize\punkt.py in _slices_from_text(self, text)
1276 def _slices_from_text(self, text):
1277 last_break = 0
-> 1278 for match in self._lang_vars.period_context_re().finditer(text):
1279 context = match.group() + match.group('after_tok')
1280 if self.text_contains_sentbreak(context):
TypeError: expected string or bytes-like object
could anyone help me figure out where exactly i may have gone wrong with my syntax?
You're passing bs to the tokenize function when you should be passing bs.text

Resources