Does pathos.multiprocessing have starmap? - python-3.x

I got an error when executing code below. The problem seems to be map doesn't support functions taking multiple inputs, just as in the python built-in multiprocessing package. But in the built-in package, there is a starmap that solves this issue. Does pathos.multiprocessing have the same?
import pathos.multiprocessing as mp
class Bar:
def foo(self, name):
return len(str(name))
def boo(self, x, y, z):
sum = self.foo(x)
sum += self.foo(y)
sum += self.foo(z)
return sum
if __name__ == '__main__':
b = Bar()
pool = mp.ProcessingPool()
results = pool.map(b.boo, [(12, 3, 456), (8, 9, 10), ('a', 'b', 'cde')])
print(results)
TypeError: boo() missing 2 required positional arguments: 'y' and 'z'
Update for lambda expression as suggested (didn't work):
if __name__ == '__main__':
b = Bar()
pool = mp.ProcessingPool()
results = pool.map(lambda x: b.boo(*x), [(12, 3, 456), (8, 9, 10), ('a', 'b', 'cde')])
print(results)
multiprocess.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File
"C:\Users\yg451\Anaconda3\lib\site-packages\multiprocess\pool.py",
line 121, in worker
result = (True, func(*args, **kwds))
File
"C:\Users\yg451\Anaconda3\lib\site-packages\multiprocess\pool.py",
line 44, in mapstar
return list(map(*args))
File
"C:\Users\yg451\Anaconda3\lib\site-packages\pathos\helpers\mp_helper.py",
line 15, in
func = lambda args: f(*args)
File "C:/Users/yg451/Code/foo/Machine
Learning/xPype/test/scratch.py", line 18, in
results = pool.map(lambda x: b.boo(*x), [(12, 3, 456), (8, 9, 10), ('a', 'b', 'cde')])
NameError: name 'b' is not defined
"""

I'm the pathos author. pathos is older than starmap, and doesn't really need it. It solved multiple arguments in a pool exactly the same way that the built-in map does.
>>> import pathos.multiprocessing as mp
>>> class Bar:
... def foo(self, name):
... return len(str(name))
... def boo(self, x, y, z):
... sum = self.foo(x)
... sum += self.foo(y)
... sum += self.foo(z)
... return sum
...
>>> b = Bar()
>>> pool = mp.ProcessingPool()
>>> f = lambda x: b.boo(*x)
>>> results = pool.map(f, [(12, 3, 456), (8, 9, 10), ('a', 'b', 'cde')])
>>> results
[6, 4, 5]
>>> results = pool.map(b.boo, [12, 9, 'a'], [3, 9, 'b'], [456, 10, 'cde'])
>>> results
[6, 4, 5]
>>> results = map(b.boo, [12, 9, 'a'], [3, 9, 'b'], [456, 10, 'cde'])
>>> list(results)
[6, 4, 5]
>>>
So, essentially, starmap is unnecessary. However, as it's been recently added to the standard Pool interface in multiprocessing in certain versions of python, it probably should be more prominent in pathos. Note that it already is possible to get to an "augmented" version of starmap from pathos if you like.
>>> import pathos
>>> mp = pathos.helpers.mp
>>> p = mp.Pool()
>>> p.starmap
<bound method Pool.starmap of <multiprocess.pool.Pool object at 0x1038684e0>>
>>>

Related

Python: from list group by elements after a specific trigger element [duplicate]

This question already has answers here:
Python3: How can I split a list based on condition?
(2 answers)
Closed last year.
I have a list like
a=['a',2,'[abcd]','bb',4,5,'kk','[efgh]',6,7,'no','[ijkl]',4,5,'lo']
So here we want group by after each '[]'
so the expected one would be
[['a',2],{'abcd': ['bb',4,5,'kk']},{'efgh': [6,7,'no']},{'ijkl': [4,5,'lo']}]
Any help would be appriciable
You can use groupby:
from itertools import groupby
a=['a',2,'[abcd]','bb',4,5,'kk','[efgh]',6,7,'no','[ijkl]',4,5,'lo']
def group_by_tag(li):
def tag_counter(x):
if isinstance(x, str) and x.startswith('[') and x.endswith(']'):
tag_counter.cnt += 1
return tag_counter.cnt
tag_counter.cnt = 0
return groupby(li, key=tag_counter)
Which you can use to make a list of tuples for each segment partitioned by the [tag]:
>>> x=[(k,list(l)) for k, l in group_by_tag(a)]
>>> x
[(0, ['a', 2]), (1, ['[abcd]', 'bb', 4, 5, 'kk']), (2, ['[efgh]', 6, 7, 'no']), (3, ['[ijkl]', 4, 5, 'lo'])]
And then create your desired mixed-type list from that:
>>> [v if k==0 else {v[0].strip('[]'):v[1:]} for k,v in x]
[['a', 2], {'abcd': ['bb', 4, 5, 'kk']}, {'efgh': [6, 7, 'no']}, {'ijkl': [4, 5, 'lo']}]
But consider that it is usually better to have a list of the same type of object to make processing that list easier.
If you want that, you could do:
>>> [{'no_tag':v} if k==0 else {v[0].strip('[]'):v[1:]} for k,v in x]
[{'no_tag': ['a', 2]}, {'abcd': ['bb', 4, 5, 'kk']}, {'efgh': [6, 7, 'no']}, {'ijkl': [4, 5, 'lo']}]
By "=>" I assume you want a dictionary if there is a preceding keyword and key be the word enclosed within the square brackets (if that's not what you intended feel free to comment and I'll edit this post)
import re
def sort(iterable):
result = {}
governing_match = None
for each in list(iterable):
match = re.findall(r"\[.{1,}\]", str(each))
if len(match) > 0:
governing_match = match[0][1:-1]
result[governing_match] = []
continue
result[governing_match].append(each)
return result
a=['[foo]', 'a' , 2 ,'[abcd]','bb',4,5,'kk','[efgh]',6,7,'no','[ijkl]',4,5,'lo']
for (k, v) in sort(a).items():
print(f"{k} : {v}")
Result :
foo : ['a', 2]
abcd : ['bb', 4, 5, 'kk']
efgh : [6, 7, 'no']
ijkl : [4, 5, 'lo']
The limitation of this is that every sequence should start with an element that is enclosed within square brackets.
You can use pairwise for that:
from itertools import pairwise
a=['a',2,'[abcd]','bb',4,5,'kk','[efgh]',6,7,'no','[ijkl]',4,5,'lo']
bracket_indices = [i for i, x in enumerate(a) if isinstance(x, str)
and x.startswith('[') and x.endswith(']')]
bracket_indices.append(len(a))
output = [a[:bracket_indices[0]]] # First item is special cased
output.extend({a[i][1:-1]: a[i+1:j]} for i, j in pairwise(bracket_indices))

Using a shorter list as values

Hello I am trying to use a shorter list as a value in a dictionary can anyone please help?
string = input("Input DNA Sequence: ")
sequence = [string[e:e+3] for e in range(0, len(string), 3)]
p_residue = list("WYS")
Input: TGGTACTCTTTCTTCACA
Output: {TGG:W,TAC:Y,TCT:S,TTC:W,TTC:Y,ACA:S}
I've tried cycle but I can't seem to make it work.
You can use cycle if you zip p_residue onto your sequence.
from itertools import cycle
def split_str_every(n, seq):
return [seq[i:i+n] for i in range(0, len(seq), n)]
def combine(seq, residue):
return zip(sequence, cycle(p_residue))
sequence = split_str_every(3, "TGGTACTCTTTCTTCACA")
p_residue = ["W", "Y", "S"]
out = combine(sequence, p_residue)
print(dict(out))
Gives:
{'TGG': 'W', 'TAC': 'Y', 'TCT': 'S', 'TTC': 'Y', 'ACA': 'S'}
Which, as you can see, dictionaries don't allow duplicate keys by definition. We can use defaultdics to circumvent this problem. To fix this, we import defaultdict and redefine our combine function:
from collections import defaultdict
def combine(seq, residue):
zipped = zip(sequence, cycle(p_residue))
ddict = defaultdict(list)
for k, v in zipped:
ddict[k].append(v)
return dict(ddict.items())
print(combine(sequence, p_residue))
Now gives the correct answer. Notice that the key TTC stores a list containing both Y & W:
{'TGG': ['W'], 'TAC': ['Y'], 'TCT': ['S'], 'TTC': ['W', 'Y'], 'ACA': ['S']}
Use
sequence = {
string[e:e+3]: p_residue[(e//3) % len(p_residue)]
for e in range(0, len(string), 3)
}
Output
{'TGG': 'W', 'TAC': 'Y', 'TCT': 'S', 'TTC': 'Y', 'ACA': 'S'}
To understand better,
e is one of [0, 3, 6, 9, 12, 15]
e//3 is integer division, so, [0, 1, 2, 3, 4, 5]
(e//3) % 3 is to keep it to residue length, so, [0, 1, 2, 0, 1, 2]
This mathematical approach induces a cycle

flatten a nested list with indices in python

I have a list ['','','',['',[['a','b']['c']]],[[['a','b'],['c']]],[[['d']]]]
I want to flatten the list with indices and the output should be as follows:
flat list=['','','','','a','b','c','a','b','c','d']
indices=[0,1,2,3,3,3,3,4,4,4,5]
How to do this?
I have tried this:
def flat(nums):
res = []
index = []
for i in range(len(nums)):
if isinstance(nums[i], list):
res.extend(nums[i])
index.extend([i]*len(nums[i]))
else:
res.append(nums[i])
index.append(i)
return res,index
But this doesn't work as expected.
TL;DR
This implementation handles nested iterables with unbounded depth:
def enumerate_items_from(iterable):
cursor_stack = [iter(iterable)]
item_index = -1
while cursor_stack:
sub_iterable = cursor_stack[-1]
try:
item = next(sub_iterable)
except StopIteration:
cursor_stack.pop()
continue
if len(cursor_stack) == 1:
item_index += 1
if not isinstance(item, str):
try:
cursor_stack.append(iter(item))
continue
except TypeError:
pass
yield item, item_index
def flat(iterable):
return map(list, zip(*enumerate_items_from(a)))
Which can be used to produce the desired output:
>>> nested = ['', '', '', ['', [['a', 'b'], ['c']]], [[['a', 'b'], ['c']]], [[['d']]]]
>>> flat_list, item_indexes = flat(nested)
>>> print(item_indexes)
[0, 1, 2, 3, 3, 3, 3, 4, 4, 4, 5]
>>> print(flat_list)
['', '', '', '', 'a', 'b', 'c', 'a', 'b', 'c', 'd']
Note that you should probably put the index first to mimic what enumerate does. It would be easier to use for people that already know enumerate.
Important remark unless you are certain your lists will not be nested too much, you shouldn't use any recursion-based solution. Otherwise as soon as you'll have a nested list with depth greater than 1000, your code will crash. I explain this here. Note that a simple call to str(list) will crash on a test case with depth > 1000 (for some python implementations it's more than that, but it's always bounded). The typical exception you'll have when using recursion-based solutions is (this in short is due to how python call stack works):
RecursionError: maximum recursion depth exceeded ...
Implementation details
I'll go step by step, first we will flatten a list, then we will output both the flattened list and the depth of all items, and finally we will output both the list and the corresponding item indexes in the "main list".
Flattening list
That being said, this is actually quite interesting as the iterative solution is perfectly designed for that, you can take a simple (non-recursive) list flattening algorithm:
def flatten(iterable):
return list(items_from(iterable))
def items_from(iterable):
cursor_stack = [iter(iterable)]
while cursor_stack:
sub_iterable = cursor_stack[-1]
try:
item = next(sub_iterable)
except StopIteration: # post-order
cursor_stack.pop()
continue
if isinstance(item, list): # pre-order
cursor_stack.append(iter(item))
else:
yield item # in-order
Computing depth
We can have access to the depth by looking at the stack size, depth = len(cursor_stack) - 1
else:
yield item, len(cursor_stack) - 1 # in-order
This will return an iterative on pairs (item, depth), if we need to separate this result in two iterators we can use the zip function:
>>> a = [1, 2, 3, [4 , [[5, 6], [7]]], [[[8, 9], [10]]], [[[11]]]]
>>> flatten(a)
[(1, 0), (2, 0), (3, 0), (4, 1), (5, 3), (6, 3), (7, 3), (8, 3), (9, 3), (10, 3), (11, 3)]
>>> flat_list, depths = zip(*flatten(a))
>>> print(flat_list)
(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
>>> print(depths)
(0, 0, 0, 1, 3, 3, 3, 3, 3, 3, 3)
We will now do something similar to have item indexes instead of the depth.
Computing item indexes
To instead compute item indexes (in the main list), you'll need to count the number of items you've seen so far, which can be done by adding 1 to an item_index every time we iterate over an item that is at depth 0 (when the stack size is equal to 1):
def flatten(iterable):
return list(items_from(iterable))
def items_from(iterable):
cursor_stack = [iter(iterable)]
item_index = -1
while cursor_stack:
sub_iterable = cursor_stack[-1]
try:
item = next(sub_iterable)
except StopIteration: # post-order
cursor_stack.pop()
continue
if len(cursor_stack) == 1: # If current item is in "main" list
item_index += 1
if isinstance(item, list): # pre-order
cursor_stack.append(iter(item))
else:
yield item, item_index # in-order
Similarly we will break pairs in two itératifs using ˋzip, we will also use ˋmap to transform both iterators to lists:
>>> a = [1, 2, 3, [4 , [[5, 6], [7]]], [[[8, 9], [10]]], [[[11]]]]
>>> flat_list, item_indexes = map(list, zip(*flatten(a)))
>>> print(flat_list)
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
>>> print(item_indexes)
[0, 1, 2, 3, 3, 3, 3, 4, 4, 4, 5]
improvement — Handling iterable inputs
Being able to take a broader palette of nested iterables as input could be desirable (especially if you build this for others to use). For example, the current implementation doesn't work as expected if we have nested iterables as input, for example:
>>> a = iter([1, '2', 3, iter([4, [[5, 6], [7]]])])
>>> flat_list, item_indexes = map(list, zip(*flatten(a)))
>>> print(flat_list)
[1, '2', 3, <list_iterator object at 0x100f6a390>]
>>> print(item_indexes)
[0, 1, 2, 3]
If we want this to work we need to be a bit careful because strings are iterable but we want them to be considered as atomic items (not a as lists of characters). Instead of assuming the input is a list as we did before:
if isinstance(item, list): # pre-order
cursor_stack.append(iter(item))
else:
yield item, item_index # in-order
We will not inspect the input type, instead we will try to use it as if it was an iterable and if it fails we will know that it’s not an iterable (duck typing):
if not isinstance(item, str):
try:
cursor_stack.append(iter(item))
continue
# item is not an iterable object:
except TypeError:
pass
yield item, item_index
With this implementation, we have:
>>> a = iter([1, 2, 3, iter([4, [[5, 6], [7]]])])
>>> flat_list, item_indexes = map(list, zip(*flatten(a)))
>>> print(flat_list)
[1, 2, 3, 4, 5, 6, 7]
>>> print(item_indexes)
[0, 1, 2, 3, 3, 3, 3]
Building test cases
If you need to generate tests cases with large depths, you can use this piece of code:
def build_deep_list(depth):
"""Returns a list of the form $l_{depth} = [depth-1, l_{depth-1}]$
with $depth > 1$ and $l_0 = [0]$.
"""
sub_list = [0]
for d in range(1, depth):
sub_list = [d, sub_list]
return sub_list
You can use this to make sure my implementation doesn't crash when the depth is large:
a = build_deep_list(1200)
flat_list, item_indexes = map(list, zip(*flatten(a)))
We can also check that we can't print such a list by using the str function:
>>> a = build_deep_list(1200)
>>> str(a)
RecursionError: maximum recursion depth exceeded while getting the repr of an object
Function repr is called by str(list) on every element from the input list.
Concluding remarks
In the end I agree that recursive implementations are way easier to read (as the call stack does half the hard work for us), but when implementing low level function like that I think it is a good investment to have a code that works in all cases (or at least all the cases you can think of). Especially when the solution is not that hard. That's also a way not to forget how to write non-recursive code working on tree-like structures (which may not happen a lot unless you are implementing data structures yourself, but that's a good exercise).
Note that everything I say “against” recursion is only true because python doesn't optimize call stack usage when facing recursion: Tail Recursion Elimination in Python. Whereas many compiled languages do Tail Call recursion Optimization (TCO). Which means that even if you write the perfect tail-recursive function in python, it will crash on deeply nested lists.
If you need more details on the list flattening algorithm you can refer to the post I linked.
Simple and elegant solution:
def flat(main_list):
res = []
index = []
for main_index in range(len(main_list)):
# Check if element is a String
if isinstance(main_list[main_index], str):
res.append(main_list[main_index])
index.append(main_index)
# Check if element is a List
else:
sub_list = str(main_list[main_index]).replace('[', '').replace(']', '').replace(" ", '').replace("'", '').split(',')
res += sub_list
index += ([main_index] * len(sub_list))
return res, index
this does the job, but if you want it to be just returned then I'll enhance it for you
from pprint import pprint
ar = ["","","",["",[["a","b"],["c"]]],[[["a","b"],["c"]]],[[["d"]]]]
flat = []
indices= []
def squash(arr,indx=-1):
for ind,item in enumerate(arr):
if isinstance(item, list):
squash(item,ind if indx==-1 else indx)
else:
flat.append(item)
indices.append(ind if indx==-1 else indx)
squash(ar)
pprint(ar)
pprint(flat)
pprint(indices)
EDIT
and this is if you don't want to keep the lists in memory and return them
from pprint import pprint
ar = ["","","",["",[["a","b"],["c"]]],[[["a","b"],["c"]]],[[["d"]]]]
def squash(arr,indx=-1,fl=[],indc=[]):
for ind,item in enumerate(arr):
if isinstance(item, list):
fl,indc = squash(item,ind if indx==-1 else indx, fl, indc)
else:
fl.append(item)
indc.append(ind if indx==-1 else indx)
return fl,indc
flat,indices = squash(ar)
pprint(ar)
pprint(flat)
pprint(indices)
I'm not expecting you would need more than 1k recursion depth which is the default setting

How to process different row in tensor based on the first column value in tensorflow

let's say I have a 4 by 3 tensor:
sample = [[10, 15, 25], [1, 2, 3], [4, 4, 10], [5, 9, 8]]
I would like to return another tensor of shape 4: [r1,r2,r3,r4] where r is either equal to tf.reduce_sum(row) if row[0] is less than 5, or r is equal to tf.reduce_mean(row) if row[0] is greater or equal to 5.
output:
output = [16.67, 6, 18, 7.33]
I'm not an adept to tensorflow, please do assist me on how to achieve the above in python 3 without a for loop.
thank you
UPDATES:
So I've tried to adapt the answer given by #Onyambu to include two samples in the functions but it gave me an error in all instances.
here is the answer for the first case:
def f(x):
c = tf.constant(5,tf.float32)
def fun1():
return tf.reduce_sum(x)
def fun2():
return tf.reduce_mean(x)
return tf.cond(tf.less(x[0],c),fun1,fun2)
a = tf.map_fn(f,tf.constant(sample,tf.float32))
The above works well.
The for two samples:
sample1 = [[10, 15, 25], [1, 2, 3], [4, 4, 10], [5, 9, 8]]
sample2 = [[0, 15, 25], [1, 2, 3], [0, 4, 10], [1, 9, 8]]
def f2(x1,x2):
c = tf.constant(1,tf.float32)
def fun1():
return tf.reduce_sum(x1[:,0] - x2[:,0])
def fun2():
return tf.reduce_mean(x1 - x2)
return tf.cond(tf.less(x2[0],c),fun1,fun2)
a = tf.map_fn(f2,tf.constant(sample1,tf.float32), tf.constant(sample2,tf.float32))
The adaptation does give errors, but the principle is simple:
calculate the tf.reduce_sum of sample1[:,0] - sample2[:,0] if row[0] is less than 1
calculate the tf.reduce_sum of sample1 - sample2 if row[0] is greater or equal to 1
Thank you for your assistance in advance!
import tensorflow as tf
def f(x):
y = tf.constant(5,tf.float32)
def fun1():
return tf.reduce_sum(x)
def fun2():
return tf.reduce_mean(x)
return tf.cond(tf.less(x[0],y),fun1,fun2)
a = tf.map_fn(f,tf.constant(sample,tf.float32))
with tf.Session() as sess: print(sess.run(a))
[16.666666 6. 18. 7.3333335]
If you want to shorten it:
y = tf.constant(5,tf.float32)
f=lambda x: tf.cond(tf.less(x[0], y), lambda: tf.reduce_sum(x),lambda: tf.reduce_mean(x))
a = tf.map_fn(f,tf.constant(sample,tf.float32))
with tf.Session() as sess: print(sess.run(a))

ThreadPoolExecutor with all params from two lists

I have two lists:
a = [1, 2, 3, 4]
b = [9, 8, 7, 6]
I'd like to have every combination of these two lists passed as argument to a function I'm multithreading:
def test(hello, world):
return hello + world
with ThreadPoolExecutor(max_workers=10) as executor:
future_to_stuff = { executor.submit(self._http_check_port, hello, world): ### }
for future in as_completed(future_to_port):
...
I'm trying to figure out how to "unpack" both my lists so that every combination of values in a and b are sent as params to the function.
I usually use following list comprehension.
future_to_stuff = [executor.submit(test, hello, world)
for hello, world in zip(a, b)]
Here is a modified code.
from concurrent.futures import ThreadPoolExecutor, as_completed
def test(hello, world):
return hello + world
def main(a, b):
with ThreadPoolExecutor(max_workers=10) as executor:
future_to_stuff = [executor.submit(test, hello, world)
for hello, world in zip(a, b)]
for future in as_completed(future_to_stuff):
print(future.result())
if __name__ == '__main__':
a = [1, 2, 3, 4]
b = [9, 8, 7, 6]
main(a, b)
Another way is to use .map method instead of .submit.
from concurrent.futures import ThreadPoolExecutor, as_completed
def test(hello, world):
return hello + world
def main(a, b):
with ThreadPoolExecutor(max_workers=10) as executor:
results = executor.map(test, a, b)
for result in results:
print(result)
if __name__ == '__main__':
a = [1, 2, 3, 4]
b = [9, 8, 7, 6]
main(a, b)

Resources