Cannot allocate memory in multiprocessing python - python-3.x

I want to apply my function (f1) to array of numbers (cdr_test) using multiprocessing. My code:
cdr_test = [x for x in range(0, 100000)]
def f1(el):
a = Counter() #make new vector for each cdr
for k,v in d3.items():
if el in v:
a = a + Counter(itertools.product([el], v))
return a
if __name__ == '__main__':
pool = mp.Pool(20)
results = pool.map(f1, cdr_test)
pool.close()
pool.join()
out = open('out.txt', 'w')
for result in results:
for k,v in result.items():
out.write('\t'.join(map(str,k))+"\t"+str(v)+"\n")
out.close()
pool.close()
I have 'cannot allocate memory'. If I use an array of smaller length (100), then everything works.
Stacktrace:
OSError Traceback (most recent call last)
<ipython-input-3-b8dc4a3d12b3> in <module>()
9
10 if __name__ == '__main__':
---> 11 pool = mp.Pool(1000)
12 results = pool.map(f1, cdr_test)
13 #new section
/home/fedorovaad/anaconda3/lib/python3.5/multiprocessing/context.py in Pool(self, processes, initializer, initargs, maxtasksperchild)
116 from .pool import Pool
117 return Pool(processes, initializer, initargs, maxtasksperchild,
--> 118 context=self.get_context())
119
120 def RawValue(self, typecode_or_type, *args):
/home/fedorovaad/anaconda3/lib/python3.5/multiprocessing/pool.py in __init__(self, processes, initializer, initargs, maxtasksperchild, context)
166 self._processes = processes
167 self._pool = []
--> 168 self._repopulate_pool()
169
170 self._worker_handler = threading.Thread(
/home/fedorovaad/anaconda3/lib/python3.5/multiprocessing/pool.py in _repopulate_pool(self)
231 w.name = w.name.replace('Process', 'PoolWorker')
232 w.daemon = True
--> 233 w.start()
234 util.debug('added worker')
235
/home/fedorovaad/anaconda3/lib/python3.5/multiprocessing/process.py in start(self)
103 'daemonic processes are not allowed to have children'
104 _cleanup()
--> 105 self._popen = self._Popen(self)
106 self._sentinel = self._popen.sentinel
107 _children.add(self)
/home/fedorovaad/anaconda3/lib/python3.5/multiprocessing/context.py in _Popen(process_obj)
265 def _Popen(process_obj):
266 from .popen_fork import Popen
--> 267 return Popen(process_obj)
268
269 class SpawnProcess(process.BaseProcess):
/home/fedorovaad/anaconda3/lib/python3.5/multiprocessing/popen_fork.py in __init__(self, process_obj)
18 sys.stderr.flush()
19 self.returncode = None
---> 20 self._launch(process_obj)
21
22 def duplicate_for_child(self, fd):
/home/fedorovaad/anaconda3/lib/python3.5/multiprocessing/popen_fork.py in _launch(self, process_obj)
65 code = 1
66 parent_r, child_w = os.pipe()
---> 67 self.pid = os.fork()
68 if self.pid == 0:
69 try:
OSError: [Errno 12] Cannot allocate memory
Are there ways to solve this?

The code you show is different from the one in the error.
---> 11 pool = mp.Pool(1000)
You are trying to spawn way too many processes, the OS will run out of memory before it can allocate them all.
You don't need this many processes to carry on your job, just use multiprocessing.cpu_count() to know how many CPUs your platform has and spawn a pool of that size.

Related

jax.lax.fori_loop Abstract tracer value encountered where concrete value is expected

I've a JAX loop that looks like this where inside the step function I use min between the two arguments
import jax
def step(timestep: int, order: int = 4) -> int:
order = min(timestep + 1, order)
return order
num_steps = 10
order = 100
order = jax.lax.fori_loop(0, num_steps, step, order)
The above code fails with a jax._src.errors.ConcretizationTypeError. This is is the full stacktrace:
WARNING:jax._src.lib.xla_bridge:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
---------------------------------------------------------------------------
UnfilteredStackTrace Traceback (most recent call last)
<ipython-input-4-9ec280f437cb> in <module>
2 order = 100
----> 3 order = jax.lax.fori_loop(0, num_steps, step, order)
16 frames
/usr/local/lib/python3.8/dist-packages/jax/_src/traceback_util.py in reraise_with_filtered_traceback(*args, **kwargs)
161 try:
--> 162 return fun(*args, **kwargs)
163 except Exception as e:
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py in fori_loop(lower, upper, body_fun, init_val)
1691
-> 1692 (_, result), _ = scan(_fori_scan_body_fun(body_fun), (lower_, init_val),
1693 None, length=upper_ - lower_)
/usr/local/lib/python3.8/dist-packages/jax/_src/traceback_util.py in reraise_with_filtered_traceback(*args, **kwargs)
161 try:
--> 162 return fun(*args, **kwargs)
163 except Exception as e:
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py in scan(f, init, xs, length, reverse, unroll)
258 # necessary, a second time with modified init values.
--> 259 init_flat, carry_avals, carry_avals_out, init_tree, *rest = _create_jaxpr(init)
260 new_init_flat, changed = _promote_weak_typed_inputs(init_flat, carry_avals, carry_avals_out)
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py in _create_jaxpr(init)
244 carry_avals = tuple(_map(_abstractify, init_flat))
--> 245 jaxpr, consts, out_tree = _initial_style_jaxpr(
246 f, in_tree, (*carry_avals, *x_avals), "scan")
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/common.py in _initial_style_jaxpr(fun, in_tree, in_avals, primitive_name)
59 primitive_name: Optional[str] = None):
---> 60 jaxpr, consts, out_tree = _initial_style_open_jaxpr(
61 fun, in_tree, in_avals, primitive_name)
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/common.py in _initial_style_open_jaxpr(fun, in_tree, in_avals, primitive_name)
53 debug = pe.debug_info(fun, in_tree, False, primitive_name or "<unknown>")
---> 54 jaxpr, _, consts = pe.trace_to_jaxpr_dynamic(wrapped_fun, in_avals, debug)
55 return jaxpr, consts, out_tree()
/usr/local/lib/python3.8/dist-packages/jax/_src/profiler.py in wrapper(*args, **kwargs)
313 with TraceAnnotation(name, **decorator_kwargs):
--> 314 return func(*args, **kwargs)
315 return wrapper
/usr/local/lib/python3.8/dist-packages/jax/interpreters/partial_eval.py in trace_to_jaxpr_dynamic(fun, in_avals, debug_info, keep_inputs)
1980 main.jaxpr_stack = () # type: ignore
-> 1981 jaxpr, out_avals, consts = trace_to_subjaxpr_dynamic(
1982 fun, main, in_avals, keep_inputs=keep_inputs, debug_info=debug_info)
/usr/local/lib/python3.8/dist-packages/jax/interpreters/partial_eval.py in trace_to_subjaxpr_dynamic(fun, main, in_avals, keep_inputs, debug_info)
1997 in_tracers_ = [t for t, keep in zip(in_tracers, keep_inputs) if keep]
-> 1998 ans = fun.call_wrapped(*in_tracers_)
1999 out_tracers = map(trace.full_raise, ans)
/usr/local/lib/python3.8/dist-packages/jax/linear_util.py in call_wrapped(self, *args, **kwargs)
166 try:
--> 167 ans = self.f(*args, **dict(self.params, **kwargs))
168 except:
/usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py in scanned_fun(loop_carry, _)
1607 i, x = loop_carry
-> 1608 return (i + 1, body_fun()(i, x)), None
1609 return scanned_fun
<ipython-input-2-2e3345899235> in step(timestep, order)
1 def step(timestep: int, order: int = 100) -> int:
----> 2 order = min(timestep + 1, order)
3 return order
/usr/local/lib/python3.8/dist-packages/jax/core.py in __bool__(self)
633 def __nonzero__(self): return self.aval._nonzero(self)
--> 634 def __bool__(self): return self.aval._bool(self)
635 def __int__(self): return self.aval._int(self)
/usr/local/lib/python3.8/dist-packages/jax/core.py in error(self, arg)
1266 def error(self, arg):
-> 1267 raise ConcretizationTypeError(arg, fname_context)
1268 return error
UnfilteredStackTrace: jax._src.errors.ConcretizationTypeError: Abstract tracer value encountered where concrete value is expected: Traced<ShapedArray(bool[], weak_type=True)>with<DynamicJaxprTrace(level=1/0)>
The problem arose with the `bool` function.
The error occurred while tracing the function scanned_fun at /usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py:1606 for scan. This concrete value was not available in Python because it depends on the values of the argument 'loop_carry'.
See https://jax.readthedocs.io/en/latest/errors.html#jax.errors.ConcretizationTypeError
The stack trace below excludes JAX-internal frames.
The preceding is the original exception that occurred, unmodified.
--------------------
The above exception was the direct cause of the following exception:
ConcretizationTypeError Traceback (most recent call last)
<ipython-input-4-9ec280f437cb> in <module>
1 num_steps = 10
2 order = 100
----> 3 order = jax.lax.fori_loop(0, num_steps, step, order)
<ipython-input-2-2e3345899235> in step(timestep, order)
1 def step(timestep: int, order: int = 100) -> int:
----> 2 order = min(timestep + 1, order)
3 return order
ConcretizationTypeError: Abstract tracer value encountered where concrete value is expected: Traced<ShapedArray(bool[], weak_type=True)>with<DynamicJaxprTrace(level=1/0)>
The problem arose with the `bool` function.
The error occurred while tracing the function scanned_fun at /usr/local/lib/python3.8/dist-packages/jax/_src/lax/control_flow/loops.py:1606 for scan. This concrete value was not available in Python because it depends on the values of the argument 'loop_carry'.
See https://jax.readthedocs.io/en/latest/errors.html#jax.errors.ConcretizationTypeError
Everything works fine if instead of using jax.lax.fori_loop i use a simple python loop, but my original code will end up very slow. How can I fix this issue?
Use jax.numpy.minimum in place of min:
def step(timestep: int, order: int = 4) -> int:
order = jax.numpy.minimum(timestep + 1, order)
return order
The reason min does not work is that in the course of executing code within jit, grad, vmap, fori_loop, etc., JAX replaces concrete values with abstract tracers, and Python functions like min don't know how to handle these abstract values. See How to Think in JAX for more background on this.

Why Exception Handling doesn't print text?

My question is why Python doesn't execute the print statement in the Exception Handling code below. I am trying to calculate the log of volumes for a bunch of stocks. Each stock has 1259 volume values. But Python generates a RunTimeWarning "divide by zero encountered in log". So I try to use Exception Handling to locate where the log input is zero, but Python doesn't execute the print statement under except. The print statement is supposed to print the name of the stock and the index in the array where the volume is zero. Why?
Here is the code:
for i, stock in enumerate(df.columns):
volumes = df[stock].to_numpy()
for r in range(len(volumes)): # len(volumes) = 1259
try:
v = np.log(volumes[r])
except:
print(stock, r)
Here is the Error that follows after the RunTimeWarning.
LinAlgError Traceback (most recent call last)
<ipython-input-6-6aa283671e2c> in <module>
13 closes = df_close[stock].to_numpy()
14 volumes = df_vol[stock].to_numpy()
---> 15 indicator_values_all_stocks[i] = indicator.price_volume_fit(volumes, closes, histLength)
16
17 indicator_values_all_stocks_no_NaN = indicator_values_all_stocks[:, ~np.isnan(indicator_values_all_stocks).any(axis=0)]
~\Desktop\Python Projects Organized\Finance\Indicator Statistics\B.57. Price Volume Fit\indicator.py in price_volume_fit(volumes, closes, histLength)
1259 x = log_volumes[i - histLength:i]
1260 y = log_prices[i - histLength:i]
-> 1261 model = np.polyfit(x, y, 1, full = True)
1262 slope[i] = model[0][0]
1263
<__array_function__ internals> in polyfit(*args, **kwargs)
c:\users\donald seger\miniconda3\envs\tensorflow\lib\site-packages\numpy\lib\polynomial.py in polyfit(x, y, deg, rcond, full, w, cov)
629 scale = NX.sqrt((lhs*lhs).sum(axis=0))
630 lhs /= scale
--> 631 c, resids, rank, s = lstsq(lhs, rhs, rcond)
632 c = (c.T/scale).T # broadcast scale coefficients
633
<__array_function__ internals> in lstsq(*args, **kwargs)
c:\users\donald seger\miniconda3\envs\tensorflow\lib\site-packages\numpy\linalg\linalg.py in lstsq(a, b, rcond)
2257 # lapack can't handle n_rhs = 0 - so allocate the array one larger in that axis
2258 b = zeros(b.shape[:-2] + (m, n_rhs + 1), dtype=b.dtype)
-> 2259 x, resids, rank, s = gufunc(a, b, rcond, signature=signature, extobj=extobj)
2260 if m == 0:
2261 x[...] = 0
c:\users\donald seger\miniconda3\envs\tensorflow\lib\site-packages\numpy\linalg\linalg.py in _raise_linalgerror_lstsq(err, flag)
107
108 def _raise_linalgerror_lstsq(err, flag):
--> 109 raise LinAlgError("SVD did not converge in Linear Least Squares")
110
111 def get_linalg_error_extobj(callback):
LinAlgError: SVD did not converge in Linear Least Squares

" ArityMismatch: Adding expressions with non-matching form arguments () vs ('v_1',) " using FEniCS

I want to solve a continuum mechanics problem thanks to FEniCS. I apply pressure and take into account the weight. But when I add the thermoelasticity component, it doesn't work anymore.
Here is my code :
from dolfin import *
from fenics import *
from ufl import nabla_div
from ufl import as_tensor
import matplotlib.pyplot as plt
import numpy as np
E = Constant(100*10**9)
nu = Constant(0.3)
Lg = 0.01; W = 0.2
mu = E/(2+2*nu)
rho = Constant(2200)
delta = W/Lg
gamma = 0.4*delta**2
beta = 8
lambda_ = (E*nu)/((1+nu)*(1-2*nu))
alpha = 1.2*(10**(-8))
deltaT = Constant(50)
Kt = E*alph*deltaT/(1-2*nu)
g = 9.81
tol = 1E-14
# Create mesh and define function space
mesh = RectangleMesh(Point(-2., 0.),Point(2., 10.), 80, 200)
V = VectorFunctionSpace(mesh, "P", 1)
# Define boundary condition
def clamped_boundary(x, on_boundary):
return on_boundary and x[1] < tol
class UpFace(SubDomain):
def inside(self, x, on_boundary):
return on_boundary and (x[1] > 10 - tol)
ueN = UpFace()
boundaries = MeshFunction("size_t", mesh, mesh.topology().dim()-1, 0)
ueN.mark(boundaries, 1)
ds = Measure("ds", domain=mesh, subdomain_data=boundaries)
bc = DirichletBC(V, Constant((0, 0)), clamped_boundary)
def epsilon(u):
return 0.5*(nabla_grad(u) + nabla_grad(u).T)
def sigma(u):
return (lambda_*nabla_div(u) - Kt)*Identity(d) + (2*mu)*epsilon(u)
# Define variational problem
u = TrialFunction(V)
d = u.geometric_dimension() # space dimension
v = TestFunction(V)
f = Constant((0,-rho*g))
T = Constant((0, 0))
Pr = Constant((0, -2*10**9))
a = inner(sigma(u), epsilon(v))*dx
L = dot(f, v)*dx + dot(T, v)*ds + dot(Pr,v)*ds(1)
# Compute solution
u = Function(V)
solve(a == L, u, bc)
# Plot solution
plot(u, mode="displacement", color= "red")
plt.colorbar(plot(u))
I get this error message :
---------------------------------------------------------------------------
ArityMismatch Traceback (most recent call last)
<ipython-input-54-805d7c5b704f> in <module>
17 # Compute solution
18 u = Function(V)
---> 19 solve(a == L, u, bc)
20
21 # Plot solution
/usr/lib/python3/dist-packages/dolfin/fem/solving.py in solve(*args, **kwargs)
218 # tolerance)
219 elif isinstance(args[0], ufl.classes.Equation):
--> 220 _solve_varproblem(*args, **kwargs)
221
222 # Default case, just call the wrapped C++ solve function
/usr/lib/python3/dist-packages/dolfin/fem/solving.py in _solve_varproblem(*args, **kwargs)
240 # Create problem
241 problem = LinearVariationalProblem(eq.lhs, eq.rhs, u, bcs,
--> 242 form_compiler_parameters=form_compiler_parameters)
243
244 # Create solver and call solve
/usr/lib/python3/dist-packages/dolfin/fem/problem.py in __init__(self, a, L, u, bcs, form_compiler_parameters)
54 else:
55 L = Form(L, form_compiler_parameters=form_compiler_parameters)
---> 56 a = Form(a, form_compiler_parameters=form_compiler_parameters)
57
58 # Initialize C++ base class
/usr/lib/python3/dist-packages/dolfin/fem/form.py in __init__(self, form, **kwargs)
42
43 ufc_form = ffc_jit(form, form_compiler_parameters=form_compiler_parameters,
---> 44 mpi_comm=mesh.mpi_comm())
45 ufc_form = cpp.fem.make_ufc_form(ufc_form[0])
46
/usr/lib/python3/dist-packages/dolfin/jit/jit.py in mpi_jit(*args, **kwargs)
45 # Just call JIT compiler when running in serial
46 if MPI.size(mpi_comm) == 1:
---> 47 return local_jit(*args, **kwargs)
48
49 # Default status (0 == ok, 1 == fail)
/usr/lib/python3/dist-packages/dolfin/jit/jit.py in ffc_jit(ufl_form, form_compiler_parameters)
95 p.update(dict(parameters["form_compiler"]))
96 p.update(form_compiler_parameters or {})
---> 97 return ffc.jit(ufl_form, parameters=p)
98
99
/usr/lib/python3/dist-packages/ffc/jitcompiler.py in jit(ufl_object, parameters, indirect)
215
216 # Inspect cache and generate+build if necessary
--> 217 module = jit_build(ufl_object, module_name, parameters)
218
219 # Raise exception on failure to build or import module
/usr/lib/python3/dist-packages/ffc/jitcompiler.py in jit_build(ufl_object, module_name, parameters)
131 name=module_name,
132 params=params,
--> 133 generate=jit_generate)
134 return module
135
/usr/lib/python3/dist-packages/dijitso/jit.py in jit(jitable, name, params, generate, send, receive, wait)
163 elif generate:
164 # 1) Generate source code
--> 165 header, source, dependencies = generate(jitable, name, signature, params["generator"])
166 # Ensure we got unicode from generate
167 header = as_unicode(header)
/usr/lib/python3/dist-packages/ffc/jitcompiler.py in jit_generate(ufl_object, module_name, signature, parameters)
64
65 code_h, code_c, dependent_ufl_objects = compile_object(ufl_object,
---> 66 prefix=module_name, parameters=parameters, jit=True)
67
68 # Jit compile dependent objects separately,
/usr/lib/python3/dist-packages/ffc/compiler.py in compile_form(forms, object_names, prefix, parameters, jit)
141 """This function generates UFC code for a given UFL form or list of UFL forms."""
142 return compile_ufl_objects(forms, "form", object_names,
--> 143 prefix, parameters, jit)
144
145
/usr/lib/python3/dist-packages/ffc/compiler.py in compile_ufl_objects(ufl_objects, kind, object_names, prefix, parameters, jit)
183 # Stage 1: analysis
184 cpu_time = time()
--> 185 analysis = analyze_ufl_objects(ufl_objects, kind, parameters)
186 _print_timing(1, time() - cpu_time)
187
/usr/lib/python3/dist-packages/ffc/analysis.py in analyze_ufl_objects(ufl_objects, kind, parameters)
88 # Analyze forms
89 form_datas = tuple(_analyze_form(form, parameters)
---> 90 for form in forms)
91
92 # Extract unique elements accross all forms
/usr/lib/python3/dist-packages/ffc/analysis.py in <genexpr>(.0)
88 # Analyze forms
89 form_datas = tuple(_analyze_form(form, parameters)
---> 90 for form in forms)
91
92 # Extract unique elements accross all forms
/usr/lib/python3/dist-packages/ffc/analysis.py in _analyze_form(form, parameters)
172 do_apply_geometry_lowering=True,
173 preserve_geometry_types=(Jacobian,),
--> 174 do_apply_restrictions=True)
175 elif r == "tsfc":
176 try:
/usr/lib/python3/dist-packages/ufl/algorithms/compute_form_data.py in compute_form_data(form, do_apply_function_pullbacks, do_apply_integral_scaling, do_apply_geometry_lowering, preserve_geometry_types, do_apply_default_restrictions, do_apply_restrictions, do_estimate_degrees, do_append_everywhere_integrals, complex_mode)
416 preprocessed_form = remove_complex_nodes(preprocessed_form)
417
--> 418 check_form_arity(preprocessed_form, self.original_form.arguments(), complex_mode) # Currently testing how fast this is
419
420 # TODO: This member is used by unit tests, change the tests to
/usr/lib/python3/dist-packages/ufl/algorithms/check_arities.py in check_form_arity(form, arguments, complex_mode)
175 def check_form_arity(form, arguments, complex_mode=False):
176 for itg in form.integrals():
--> 177 check_integrand_arity(itg.integrand(), arguments, complex_mode)
/usr/lib/python3/dist-packages/ufl/algorithms/check_arities.py in check_integrand_arity(expr, arguments, complex_mode)
157 key=lambda x: (x.number(), x.part())))
158 rules = ArityChecker(arguments)
--> 159 arg_tuples = map_expr_dag(rules, expr, compress=False)
160 args = tuple(a[0] for a in arg_tuples)
161 if args != arguments:
/usr/lib/python3/dist-packages/ufl/corealg/map_dag.py in map_expr_dag(function, expression, compress)
35 Return the result of the final function call.
36 """
---> 37 result, = map_expr_dags(function, [expression], compress=compress)
38 return result
39
/usr/lib/python3/dist-packages/ufl/corealg/map_dag.py in map_expr_dags(function, expressions, compress)
84 r = handlers[v._ufl_typecode_](v)
85 else:
---> 86 r = handlers[v._ufl_typecode_](v, *[vcache[u] for u in v.ufl_operands])
87
88 # Optionally check if r is in rcache, a memory optimization
/usr/lib/python3/dist-packages/ufl/algorithms/check_arities.py in sum(self, o, a, b)
46 def sum(self, o, a, b):
47 if a != b:
---> 48 raise ArityMismatch("Adding expressions with non-matching form arguments {0} vs {1}.".format(_afmt(a), _afmt(b)))
49 return a
50
ArityMismatch: Adding expressions with non-matching form arguments () vs ('v_1',).
When I write this (I remove the Kt from sigma(u)):
def sigma(u):
return (lambda_*nabla_div(u))*Identity(d) + (2*mu)*epsilon(u)
It works perfectly.
In this page (Click here), they try to plot the same kind problem and it works on my computer.
Do you know how to fix it ?
I had exactly the same question and a colleague of mine did figure it out for me. As there is no answer given here, I will try to leave some directions to guide others to the solution. I have not a lot of expertise yet, so please consider that my use of terminology might be a little bit off.
The error of fenics somewhat mislead me into thinking the error is in the definition of the stress term sigma. It is not exactly there. The right handside and the left handside in the solve function are not defined correctly (also shown in the very top of the error code). The term kT*Identity(d) in the stress function sigma, is not dependent on the trialfunction u. It is just multiplied by the testfunction v later (epsilon(v)). Therefore it has to go into the L of the equation of the solver.
Beneath the Link that you shared, the scipt uses the rhs and lhs function to correctly split the equation into a and L.

Overflow when unpacking long - Pytorch

I am running the following code
import torch
from __future__ import print_function
x = torch.empty(5, 3)
print(x)
on an Ubuntu machine in CPU mode, which gives me following error, what would be the reason and how to fix
x = torch.empty(5, 3)
----> print(x)
/usr/local/lib/python3.6/dist-packages/torch/tensor.py in __repr__(self)
55 # characters to replace unicode characters with.
56 if sys.version_info > (3,):
---> 57 return torch._tensor_str._str(self)
58 else:
59 if hasattr(sys.stdout, 'encoding'):
/usr/local/lib/python3.6/dist-packages/torch/_tensor_str.py in _str(self)
216 suffix = ', dtype=' + str(self.dtype) + suffix
217
--> 218 fmt, scale, sz = _number_format(self)
219 if scale != 1:
220 prefix = prefix + SCALE_FORMAT.format(scale) + ' ' * indent
/usr/local/lib/python3.6/dist-packages/torch/_tensor_str.py in _number_format(tensor, min_sz)
94 # TODO: use fmod?
95 for value in tensor:
---> 96 if value != math.ceil(value.item()):
97 int_mode = False
98 break
RuntimeError: Overflow when unpacking long
Since, torch.empty() gives uninitialized memory, so you may or may not get a large value from it. Try
x = torch.rand(5, 3)
print(x)
this would give the response.

compute distance for data frame columns from python list

I have have a dataframe
data=sqlContext.createDataFrame([[33.603699,-83.967819[43.609422,-84.188726],[40.751800537,-74.066200256]],['a','b'])
and I have a list of lat/lon pairs. For each lat/lon pair in the data I want to compute the distance between each lat/lon
pair in the list. I am using code form this answer as my distance function
How to sum distances between data points in a dataset using (Py)Spark?
lat_lon_list=[[26.145677, -80.120355],[26.179337, -80.25151600000001],[26.188919, -98.21469499999999], [26.641769, -81.875031]]
def dist_2(long_x, lat_x, long_y, lat_y):
z0=np.sin(np.radians(lat_y))
z1=np.cos(np.radians(lat_y))
z3=np.radians(long_y)
return F.acos(F.sin(F.toRadians(F.col(long_x)) * z0 + \
F.cos(F.toRadians(F.col(lat_x))) * z1 * \
F.cos(F.toRadians(F.col(long_x))) - z3\
) * F.lit((6371.0)*(0.621371)))
def dist_1(x,y):
return [dist_2(x,y,c[0],c[1]) for c in lat_lon_list]
When i try to compute the distance i get the following error
data.select('a','b',dist_1('a','b')).show()
TypeErrorTraceback (most recent call last)
<ipython-input-53-8ec09912a7b1> in <module>()
24
25
---> 26 data.select('a','b',dist_1('a','b')).show()
/opt/spark/current/python/pyspark/sql/dataframe.py in select(self,
*cols)
859 [Row(name=u'Alice', age=12), Row(name=u'Bob',
age=15)]
860 """
--> 861 jdf = self._jdf.select(self._jcols(*cols))
862 return DataFrame(jdf, self.sql_ctx)
863
/opt/spark/current/python/pyspark/sql/dataframe.py in _jcols(self,
*cols)
714 if len(cols) == 1 and isinstance(cols[0], list):
715 cols = cols[0]
--> 716 return self._jseq(cols, _to_java_column)
717
718 def _sort_cols(self, cols, kwargs):
/opt/spark/current/python/pyspark/sql/dataframe.py in _jseq(self,
cols, converter)
701 def _jseq(self, cols, converter=None):
702 """Return a JVM Seq of Columns from a list of Column
or names"""
--> 703 return _to_seq(self.sql_ctx._sc, cols, converter)
704
705 def _jmap(self, jm):
/opt/spark/current/python/pyspark/sql/column.py in _to_seq(sc, cols,
converter)
57 """
58 if converter:
---> 59 cols = [converter(c) for c in cols]
60 return sc._jvm.PythonUtils.toSeq(cols)
61
/opt/spark/current/python/pyspark/sql/column.py in
_to_java_column(col)
45 jcol = col._jc
46 else:
---> 47 jcol = _create_column_from_name(col)
48 return jcol
49
/opt/spark/current/python/pyspark/sql/column.py in
_create_column_from_name(name)
38 def _create_column_from_name(name):
39 sc = SparkContext._active_spark_context
---> 40 return sc._jvm.functions.col(name)
41
42
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in __call__(self, *args)
1122
1123 def __call__(self, *args):
-> 1124 args_command, temp_args = self._build_args(*args)
1125
1126 command = proto.CALL_COMMAND_NAME +\
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in _build_args(self, *args)
1086 def _build_args(self, *args):
1087 if self.converters is not None and
len(self.converters) > 0:
-> 1088 (new_args, temp_args) = self._get_args(args)
1089 else:
1090 new_args = args
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in _get_args(self, args)
1073 for converter in
self.gateway_client.converters:
1074 if converter.can_convert(arg):
-> 1075 temp_arg = converter.convert(arg,
self.gateway_client)
1076 temp_args.append(temp_arg)
1077 new_args.append(temp_arg)
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_collections.py in convert(self, object,
gateway_client)
499 java_list = ArrayList()
500 for element in object:
--> 501 java_list.add(element)
502 return java_list
503
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in __call__(self, *args)
1122
1123 def __call__(self, *args):
-> 1124 args_command, temp_args = self._build_args(*args)
1125
1126 command = proto.CALL_COMMAND_NAME +\
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in _build_args(self, *args)
1086 def _build_args(self, *args):
1087 if self.converters is not None and
len(self.converters) > 0:
-> 1088 (new_args, temp_args) = self._get_args(args)
1089 else:
1090 new_args = args
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_gateway.py in _get_args(self, args)
1073 for converter in
self.gateway_client.converters:
1074 if converter.can_convert(arg):
-> 1075 temp_arg = converter.convert(arg,
self.gateway_client)
1076 temp_args.append(temp_arg)
1077 new_args.append(temp_arg)
/opt/spark/current/python/lib/py4j-0.10.3-
src.zip/py4j/java_collections.py in convert(self, object,
gateway_client)
510 HashMap = JavaClass("java.util.HashMap",
gateway_client)
511 java_map = HashMap()
--> 512 for key in object.keys():
513 java_map[key] = object[key]
514 return java_map
TypeError: 'Column' object is not callable
Any help would be appreciated.
This is because your function returns a list. You can unpack:
data.select('a','b', *dist_1('a','b'))
or combine:
data.select(['a','b'] + dist_1('a','b'))

Resources