Loop over Set of Functions for Variable Pipelining and Separate Usage

Loop over Set of Functions for Variable Pipelining and Separate Usage - python-3.x

I have the following 10 functions:
def function1(data1,data2):
...
return value
def function2(data1,data2):
...
return value
...
def function10(data1,data2):
...
return value
I want to use these functions separately when needed but also
in a pipeline for calculating properties and appending to a list.
Like this:
collecting_list = []
for idx in range(10):
collecting_list.append(function1(data1[idx],data2[idx]))
collecting_list.append(function2(data1[idx],data2[idx]))
collecting_list.append(function3(data1[idx],data2[idx]))
collecting_list.append(function4(data1[idx],data2[idx]))
collecting_list.append(function5(data1[idx],data2[idx]))
collecting_list.append(function6(data1[idx],data2[idx]))
collecting_list.append(function7(data1[idx],data2[idx]))
collecting_list.append(function8(data1[idx],data2[idx]))
collecting_list.append(function9(data1[idx],data2[idx]))
collecting_list.append(function10(data1[idx],data2[idx])
Obviously I would need some property to loop over function names, but I never came across this problem before and was just wondering if I can call those functions in a loop without hard coding this and just adjusting the function-number (e.g. function1(), function2(), ... function10()).
Hints and ideas appreciated!

use lambda and exec.
you could have a string array of the function names, and lambda functions that return the data like something below. With lambda functions, you can reuse the same name dataX over and over again and with proper implementation get the right data needed. See below for a very basic, abstract example:
import random
def getData1():
return random.randint(1, 10)
def getData2():
return random.randint(11, 20)
def function1(data1):
print("f1, {}".format(data1))
def function2(data1, data2):
print("f2, {} and {}".format(data1, data2))
data1 = lambda: getData1() # these can be any function that serves as the
data2 = lambda: getData2() # source for your data. using lambda allows for
# anonymization and reuse
functionList = ["function1({})".format(data1()), "function2({},{})".format(data1(), data2())]
for f in functionList:
exec(f)
function1(data1())
You might ask why not just use getData1() in the function list instead of data1, and the answer has to do with parameters. If the getDataX functions required parameters, you wouldn't want to compute the functionList every time a parameter name changed. This is one of the benefits of using lambda and exec.

Um, sure?
import sys
import types
module_name = sys.modules[__name__]
def function1(data1, data2):
return ("func1", data1 + data2)
def function2(data1, data2):
return ("func2", data1 + data2)
def function3(data1, data2):
return ("func3", data1 + data2)
def function4(data1, data2):
return ("func4", data1 + data2)
def function5(data1, data2):
return ("func5", data1 + data2)
def get_functions():
func_list = list()
for k in sorted(module_name.__dict__.keys()):
if k.startswith('function'):
if isinstance(module_name.__dict__[k], types.FunctionType):
func_list.append(module_name.__dict__[k])
return func_list
def get_functions_2():
func_list = list()
for itr in range(1, 100):
try:
func_list.append(getattr(module_name, "function%s" % itr))
except:
break
return func_list
def run_pipeline(function_list):
collecting_list = list()
for idx, func in enumerate(function_list):
collecting_list.append(func(idx, idx))
return collecting_list
if __name__ == "__main__":
funcs = get_functions()
results = run_pipeline(funcs)
print(results)
Outputs:
[('func1', 0), ('func2', 2), ('func3', 4), ('func4', 6), ('func5', 8)]
Note: I probably wouldn't do it this way if I was trying to construct dynamic computational pipelines, but you can use this method. You could in theory create a file per pipeline and name them in order to use this method though?
Edit: Added get_functions_2 per request

Related

Is the diction get() function implemented in micropython?

I was wondering if the diction get() function is implemented in micropython, or is there a way to use a dictionary to access multiple functions
#Below takes input from command line
Input_actual = sys.argv[2]
def fxn1(X):
X **2
def fxn2(X):
X**3
def fxn3(X):
X**4
outcomes = {input1: fxn1, input2: fxn2}
Executor= outcomes.get(Input_actual, fxn3)
Executor(Input_actual)

Is there a way to extract function parameters from a dictionary?

I am trying to create some more functional code in Python and I want to know if it is possible to transform dictionary (key,values) to pass as a function parameter.
I am currently doing this in a more imperative way, where I filter and then manually extract each key depending on the result of the filter. My current code:
def a(i: int, config: dict):
function_array = [function1, function2, function3]
selected = function_array[i]
if (i == "0"):
result = selected(x = config['x'])
elif (i == "1"):
result = selected(y = config['y'])
elif (i == "2"):
result = selected(z = config['z'])
return result
The current result is correct, but when I have many cases, I need to hardcode each parameter for the specified function. So, that is why I want to know if it is possible to pass the config object as I want (with an x when i is 0, for example) and then just do something like this:
def a(i: int, config: dict):
function_array = [function1, function2, function3]
result = function_array[i](config)
return result

The syntax for passing items from a dictionary as function parameters is simply selected(**config)
So for your example, it would look something like this:
def function1(x=0):
return x + 1
def function2(y=42):
return y * 2
def function3(z=100):
return z
def a(i, config):
function_array = [function1, function2, function3]
selected = function_array[i]
return selected(**config)
config = {x: 10}
a(0, config) # calls function1(x=10)
config = {y: 20}
a(1, config) # calls function2(y=20)
config = {}
a(2, config) # calls function3()

Every python function can be instructed to take a dictionary of keywords. See e.g. https://www.pythoncheatsheet.org/blog/python-easy-args-kwargs . (Official source at https://docs.python.org/3/reference/compound_stmts.html#function-definitions, but it's harder to read.)
You could do:
def a(i: int, keyword: str, **kwargs: dict):
if keyword in kwargs:
result = kwargs[keyword](i)
and you would run it with something like:
a(5, "func3", func1=print, func2=sum, func3=all)
Or, you could just pass a dictionary itself into the function:
def a(i: int, keyword: str, config: dict)
if keyword in config:
result = config[keyword](i)
This would be run with something like:
a(5, "func3", {"func1": print, "func2": sum, "func3": all})
The only difference is that the ** in the function declaration tells python to automatically make a dictionary out of explicit keywords. In the second example, you make the dictionary by yourself.
There's an important thing happening behind the scenes here. Functions are being passed around just like anything else. In python, functions are still objects. You can pass a function just as easily as you can pass an int. So if you wanted to have a list of lists, where each inner list is a function with some arguments, you easily could:
things_to_do = [[sum, 5, 7, 9], [any, 1, 0], [all, 1, 0]]
for thing_list in things_to_do:
function = thing_list[0]
args = thing_list[1:]
print(function(args))
And you'll get the following results:
21
True
False
(Note also that all of those functions take an iterable, such as a list. If you want to pass each argument separately, you would use *args instead of args.)
You can do it with defined functions, too. If you have
def foo1(arg1):
pass
def foo2(arg1, arg2):
pass
you can just as easily have
things_to_do = [[sum, 5, 7, 9], [foo1, 'a'], [foo2, 0, None]]

Most "pythonic" way of populating a nested indexed list from a flat list

I have a situation where I am generating a number of template nested lists with n organised elements where each number in the template corresponds to the index from a flat list of n values:
S =[[[2,4],[0,3]], [[1,5],[6,7]],[[10,9],[8,11],[13,12]]]
For each of these templates, the values inside them correspond to the index value from a flat list like so:
A = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n"]
to get;
B = [[["c","e"],["a","d"]], [["b","f"],["g","h"]],[["k","j"],["i","l"],["n","m"]]]
How can I populate the structure S with the values from list A to get B, considering that:
- the values of list A can change in value but not in a number
- the template can have any depth of nested structure of but will only use an index from A once as the example shown above.
I did this with the very ugly append unflatten function below that works if the depth of the template is not more then 3 levels. Is there a better way of accomplishing it using generators, yield so it works for any arbitrary depth of template.
Another solution I thought but couldn't implement is to set the template as a string with generated variables and then assigning the variables with new values using eval()
def unflatten(item, template):
# works up to 3 levels of nested lists
tree = []
for el in template:
if isinstance(el, collections.Iterable) and not isinstance(el, str):
tree.append([])
for j, el2 in enumerate(el):
if isinstance(el2, collections.Iterable) and not isinstance(el2, str):
tree[-1].append([])
for k, el3 in enumerate(el2):
if isinstance(el3, collections.Iterable) and not isinstance(el3, str):
tree[-1][-1].append([])
else:
tree[-1][-1].append(item[el3])
else:
tree[-1].append(item[el2])
else:
tree.append(item[el])
return tree
I need a better solution that can be employed to accomplish this when doing the above recursively and for n = 100's of organised elements.
UPDATE 1
The timing function I am using is this one:
def timethis(func):
'''
Decorator that reports the execution time.
'''
#wraps(func)
def wrapper(*args, **kwargs):
start = time.time()
result = func(*args, **kwargs)
end = time.time()
print(func.__name__, end-start)
return result
return wrapper
and I am wrapping the function suggested by #DocDrivin inside another to call it with a one-liner. Below it is my ugly append function.
#timethis
def unflatten(A, S):
for i in range(100000):
# making sure that you don't modify S
rebuilt_list = copy.deepcopy(S)
# create the mapping dict
adict = {key: val for key, val in enumerate(A)}
# the recursive worker function
def worker(alist):
for idx, entry in enumerate(alist):
if isinstance(entry, list):
worker(entry)
else:
# might be a good idea to catch key errors here
alist[idx] = adict[entry]
#build list
worker(rebuilt_list)
return rebuilt_list
#timethis
def unflatten2(A, S):
for i in range (100000):
#up to level 3
temp_tree = []
for i, el in enumerate(S):
if isinstance(el, collections.Iterable) and not isinstance(el, str):
temp_tree.append([])
for j, el2 in enumerate(el):
if isinstance(el2, collections.Iterable) and not isinstance(el2, str):
temp_tree[-1].append([])
for k, el3 in enumerate(el2):
if isinstance(el3, collections.Iterable) and not isinstance(el3, str):
temp_tree[-1][-1].append([])
else:
temp_tree[-1][-1].append(A[el3])
else:
temp_tree[-1].append(A[el2])
else:
temp_tree.append(A[el])
return temp_tree
The recursive method is much better syntax, however, it is considerably slower then using the append method.

You can do this by using recursion:
import copy
S =[[[2,4],[0,3]], [[1,5],[6,7]],[[10,9],[8,11],[13,12]]]
A = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n"]
# making sure that you don't modify S
B = copy.deepcopy(S)
# create the mapping dict
adict = {key: val for key, val in enumerate(A)}
# the recursive worker function
def worker(alist):
for idx, entry in enumerate(alist):
if isinstance(entry, list):
worker(entry)
else:
# might be a good idea to catch key errors here
alist[idx] = adict[entry]
worker(B)
print(B)
This yields the following output for B:
[[['c', 'e'], ['a', 'd']], [['b', 'f'], ['g', 'h']], [['k', 'j'], ['i', 'l'], ['n', 'm']]]
I did not check if the list entry can actually be mapped with the dict, so you might want to add a check (marked the spot in the code).
Small edit: just saw that your desired output (probably) has a typo. Index 3 maps to "d", not to "c". You might want to edit that.
Big edit: To prove that my proposal is not as catastrophic as it seems at a first glance, I decided to include some code to test its runtime. Check this out:
import timeit
setup1 = '''
import copy
S =[[[2,4],[0,3]], [[1,5],[6,7]],[[10,9],[8,11],[13,12]]]
A = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n"]
adict = {key: val for key, val in enumerate(A)}
# the recursive worker function
def worker(olist):
alist = copy.deepcopy(olist)
for idx, entry in enumerate(alist):
if isinstance(entry, list):
worker(entry)
else:
alist[idx] = adict[entry]
return alist
'''
code1 = '''
worker(S)
'''
setup2 = '''
import collections
S =[[[2,4],[0,3]], [[1,5],[6,7]],[[10,9],[8,11],[13,12]]]
A = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n"]
def unflatten2(A, S):
#up to level 3
temp_tree = []
for i, el in enumerate(S):
if isinstance(el, collections.Iterable) and not isinstance(el, str):
temp_tree.append([])
for j, el2 in enumerate(el):
if isinstance(el2, collections.Iterable) and not isinstance(el2, str):
temp_tree[-1].append([])
for k, el3 in enumerate(el2):
if isinstance(el3, collections.Iterable) and not isinstance(el3, str):
temp_tree[-1][-1].append([])
else:
temp_tree[-1][-1].append(A[el3])
else:
temp_tree[-1].append(A[el2])
else:
temp_tree.append(A[el])
return temp_tree
'''
code2 = '''
unflatten2(A, S)
'''
print(f'Recursive func: { [i/10000 for i in timeit.repeat(setup = setup1, stmt = code1, repeat = 3, number = 10000)] }')
print(f'Original func: { [i/10000 for i in timeit.repeat(setup = setup2, stmt = code2, repeat = 3, number = 10000)] }')
I am using the timeit module to do my tests. When running this snippet, you will get an output similar to this:
Recursive func: [8.74395573977381e-05, 7.868373290111777e-05, 7.9051584698027e-05]
Original func: [3.548609419958666e-05, 3.537480780214537e-05, 3.501355930056888e-05]
These are the average times of 10000 iterations, and I decided to run it 3 times to show the fluctuation. As you can see, my function in this particular case is 2.22 to 2.50 times slower than the original, but still acceptable. The slowdown is probably due to using deepcopy.
Your test has some flaws, e.g. you redefine the mapping dict at every iteration. You wouldn't do that normally, instead you would give it as a param to the function after defining it once.

You can use generators with recursion
A = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n"]
S = [[[2,4],[0,3]], [[1,5],[6,7]],[[10,9],[8,11],[13,12]]]
A = {k: v for k, v in enumerate(A)}
def worker(alist):
for e in alist:
if isinstance(e, list):
yield list(worker(e))
else:
yield A[e]
def do(alist):
return list(worker(alist))
This is also a recursive approach, just avoiding individual item assignment and letting list do the work by reading the values "hot off the CPU" from your generator. If you want, you can Try it online!-- setup1 and setup2 copied from #DocDriven 's answer (but I recommend you don't exaggerate with the numbers, do it locally if you want to play around).
Here are example time numbers:
My result: [0.11194685893133283, 0.11086182110011578, 0.11299032904207706]
result1: [1.0810202199500054, 1.046933784848079, 0.9381260159425437]
result2: [0.23467918601818383, 0.236218704842031, 0.22498539905063808]

How to have one method reference another method from within the same class?

I am trying to better understand classes. Suppose I have a class that handles data modifications, such as copying data and resampling data. In my case, the function resample requires the function duplicate since I want to modify a copy of the original data rather than the original data itself.
I've seen a similar question addressed here, though I was unable to apply those solutions to my case. I'm wondering if things like #classmethod are required because of posts like this.
import numpy as np
import random
class DataMods():
def __init__(self, data):
self.data = data
def duplicate(self):
if isinstance(self.data, np.ndarray):
res = np.copy(self.data)
else:
res = self.data[:]
return res
def resample(self, nshuffles=0, struct=np.array):
# resample copy of data instead of data
data_dup = self.duplicate(self.data) # this is the problematic line
size = len(data_dup)
if isinstance(nshuffles, int):
if nshuffles > 1:
for idx in range(nshuffles - 1):
res = struct(random.sample(list(data_dup), size))
else:
raise ValueError("nshuffles ...")
return struct(random.sample(list(res), size))
aa = np.array([1, 2, 3, 4, 5])
a2 = DataMods(data=aa).resample(nshuffles=5)
print(a2)
>> TypeError: duplicate() takes 1 positional argument but 2 were given
If I change the problematic line to:
data_dup = self.duplicate(data)
Then I get a different error:
NameError: name 'data' is not defined
I've tried a few different variations, but all were unsuccessful. What am I not understanding correctly?

pytest test needs parametrization at collection phase and at setup time

I have some tests which I'd like to parametrize using some arguments which need the parametrization to happen during collection phase and some which need it to happen at setup time. I'm unable to use
metafunc.parametrize in a pytest_generate_test hook since I need some fixtures to have indirect=True to pass the argname as a request.param, but the other arguments need to have indirect=False.
Any ideas how to do this?
Here's an example of what my tests look like and what I want to do:
def pytest_generate_tests(metafunc):
if metafunc.function.__name__ == 'test_example':
argnames = []
argvalues = []
parameters = getattr(metafunc.function, 'paramlist', ())
for p in parameters:
if type(p) == list:
argnames = tuple(['myfixture'] + p)
else:
argvalues.append = tuple(['std'] + p['argvalues'])
argvalues.append = tuple(['pro'] + p['argvalues'])
# I want to do the following, but it won't work since some of the
# args need indirect set to true and some need indirect set to false.
metafunc.parametrize(argnames, argvalues, indirect=True)
elif 'myfixture' in metafunc.fixturenames:
# we have existing tests which use the fixture, but only with
standard
metafunc.parametrize("myfixture", "std")
else:
# we have existing tests which use older style parametrization,
non-fixture
for p in getattr(metafunc.function, 'paramlist', ()):
metafunc.addcall(funcargs=p)
def params(decolist):
def wrapper(function):
function.paramlist = decolist
return function
return wrapper
#pytest.fixture
def myfixture(request):
If request.param == 'std':
myfix = SomeObject()
elif request.param == 'pro':
myfix = SomeOtherObject()
def fin():
myfix.close()
request.addfinalizer(fin)
return myfix
#params([
['color', 'type'],
{ 'argvalues': [ 'blue', 'cat'] },
{ 'argvalues': ['pink', 'dog'] }
])
def test_example(myfixture, color, type):
# this is the new test we want to add
def test_something(myfixture):
# existing test which only uses std fixture
#params([
{'arg1': 1, 'arg2': 2},
{'arg1': 3, 'arg2': 5}
])
def test_old_style(arg1, arg2):
# existing tests which don't use fixtures
Thanks for reading through this! I know it's rather long.

per design all parametization happens at collection time

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Loop over Set of Functions for Variable Pipelining and Separate Usage - python-3.x

Related

Is the diction get() function implemented in micropython?

Is there a way to extract function parameters from a dictionary?

Most "pythonic" way of populating a nested indexed list from a flat list

How to have one method reference another method from within the same class?

pytest test needs parametrization at collection phase and at setup time

Categories

Resources