Load pure Python module from in-memory zipfile - python-3.x

From this, I was able to make this:
import os
import types
import zipfile
import sys
import io
class ZipImporter(object):
def __init__(self, zip_file):
self.zfile = zip_file
self._paths = [x.filename for x in self.zfile.filelist]
def _mod_to_paths(self, fullname):
# get the python module name
py_filename = fullname.replace(".", os.sep) + ".py"
# get the filename if it is a package/subpackage
py_package = fullname.replace(".", os.sep) + "/__init__.py"
print(py_package)
if py_filename in self._paths:
return py_filename
elif py_package in self._paths:
return py_package
else:
return None
def find_module(self, fullname, path):
if self._mod_to_paths(fullname) is not None:
return self
return None
def load_module(self, fullname):
filename = self._mod_to_paths(fullname)
if not filename in self._paths:
raise ImportError(fullname)
new_module = types.ModuleType(fullname)
new_module.__name__=fullname
print(fullname)
exec(self.zfile.open(filename, 'r').read(),new_module.__dict__)
new_module.__file__ = filename
new_module.__loader__ = self
if filename.endswith("__init__.py"):
new_module.__path__ = []
new_module.__package__ = fullname
else:
new_module.__package__ = fullname.rpartition('.')[0]
sys.modules[fullname]=new_module
return new_module
module_zip=zipfile.ZipFile(io.BytesIO(),"w")
for key in module_dict:
module_zip.writestr(key,module_dict[key])
sys.meta_path.append(ZipImporter(module_zip))
import pyparsing
Using the source code of pyparsing as a test. However, it fails with ImportError: attempted relative import with no known parent package. Even if I replace all the relative imports with absolute imports, it fails with RecursionError: maximum recursion depth exceeded while calling a Python object, as it tries to import pyparsing repeatedly. Is there something fundamental I'm not understanding about the way Python's import system works?

I found the answer --- PEP 302 says that:
Note that the module object must be in sys.modules before the loader executes the module code. This is crucial because the module code may (directly or indirectly) import itself; adding it to sys.modules beforehand prevents unbounded recursion in the worst case and multiple loading in the best.

Related

Not able to create mock object using mocker

I am trying to create mock object for unit testing but somehow always actual object is called.
Below is the code for reference:-
utility_functions.py
import os
import json
def get_module_configurations(key):
config = os.getcwd()
config = config + "\\xyz.json"
with open(config) as f:
module_config = json.load(f)
module_config = module_config[key]
return module_config
load_class
from importlib import import_module
from inspect import isclass, isabstract
def load_module(data):
package_path = data['Package']
module_name = data['ModuleName']
class_name = data['ClassName']
try:
module_name = str(module_name.split('.py')[0])
module = import_module('.' + module_name, package_path)
except Exception as error:
pass
try:
_class = getattr(module, class_name)
except Exception as error:
pass
if isclass(_class) and not (isabstract(_class)):
return _class
else:
return None
function1.py
import load_class
from utility_functions import get_module_configurations
def load_helpers(task_name):
module = get_module_configurations(task_name)
cls = load_class.load_module(module)
return cls
test_function.py
import pytest
from function1 import load_helpers
def test_mock(mocker):
class_to_load = {"Package": "Test", "ModuleName": "default_class.py", "ClassName":
"DefaultClass"}
mocker.patch('function1.load_helpers', return_value= class_to_load)
result = load_helpers('c')
assert result is not None
Since I am mocking, load helpers should not be called but it always calls actual implementation saying path is invalid.
I am missing something basic but cannot figure out what.
Any help will be great.
If you are importing the function into your module (from function1 import load_helpers), you need to patch it as if it was part of it. This means that instead of...
mocker.patch('function1.load_helpers', return_value=class_to_load)
...you should use...
mocker.patch('test_function.load_helpers', return_value=class_to_load)
PS: I assume that you are just practicing mocking because otherwise your test function doesn't make sense.

Is it possible to make a module available as an import from another module?

I'm refactoring some code and have moved around some files. But for backwards compatibility, I would like to make all of my modules keep their old import paths.
my file structure is as follows
--| calcs/
----| __init__.py
----| new_dir
------| new_file1.py
------| new_file2.py
What do I need to do ensure that I can use an import like
import calcs.newfile1.foo
# OR
from calcs.newfile1 import foo
I have tried a few methods of adding the imports to the top level __init__.py file. As is reccommended here
But while this seems to allow an import such as import calcs.newfile1, An import such as import calcs.newfile1.foo raises ModuleNotFoundError: No module named calcs.newfile1
I expect that I need python to recognize calcs.newfile1 as a **module **. At the moment it seems to just be importing it as a class or other object of some sort
The only way i know how to do it is by creating a custom import hook.
Here is the PEP for more information.
If you need some help on how to implement one, i'll suggest you to take a look at the six module,
here
and here
Basically your calcs/__init__.py will become like this:
''' calcs/__init__.py '''
from .new_dir import new_file1, new_file2
import sys
__path__ = []
__package__ = __name__
class CalcsImporter:
def __init__(self, exported_mods):
self.exported_mods = {
f'{__name__}.{key}': value for key, value in exported_mods.items()
}
def find_module(self, fullname, path=None):
if fullname in self.exported_mods:
return self
return None
def load_module(self, fullname):
try:
return sys.modules[fullname]
except KeyError:
pass
try:
mod = self.exported_mods[fullname]
except KeyError:
raise ImportError('Unable to load %r' % fullname)
mod.__loader__ = self
sys.modules[fullname] = mod
return mod
_importer = CalcsImporter({
'new_file1': new_file1,
'new_file2': new_file2,
})
sys.meta_path.append(_importer)
and you should be able to do from calcs.new_file1 import foo

Using multiprocessing.Pool in Python with a function returning custom object

I am using multiprocessing.Pool to speed up computation, as I call one function multiple times, and then collate the result. Here is a snippet of my code:
import multiprocessing
from functools import partial
def Foo(id:int,constant_arg1:str, constant_arg2:str):
custom_class_obj = CustomClass(constant_arg1, constant_arg2)
custom_class_obj.run() # this changes some attributes of the custom_class_obj
if(something):
return None
else:
return [custom_class_obj]
def parallel_run(iters:int, a:str, b:str):
pool = multiprocessing.Pool(processes=k)
## create the partial function obj before passing it to pool
partial_func = partial(Foo, constant_arg1=a, constant_arg2=b)
## create the variable id list
iter_list = list(range(iters))
all_runs = pool.map(partial_func, iter_list)
return all_runs
This throws the following error in the multiprocessing module:
multiprocessing.pool.MaybeEncodingError: Error sending result: '[[<CustomClass object at 0x1693c7070>], [<CustomClass object at 0x1693b88e0>], ....]'
Reason: 'TypeError("cannot pickle 'module' object")'
How can I resolve this?
I was able to replicate the error message with a minimal example of an un-picklable class. The error basically states the instance of your class can't be pickled because it contains a reference to a module, and modules are not picklable. You need to comb through CustomClass to make sure instances don't hold things like open file handles, module references, etc.. If you need to have those things, you should use __getstate__ and __setstate__ to customize the pickle and unpickle process.
distilled example of your error:
from multiprocessing import Pool
from functools import partial
class klass:
def __init__(self, a):
self.value = a
import os
self.module = os #this fails: can't pickle a module and send it back to main process
def foo(a, b, c):
return klass(a+b+c)
if __name__ == "__main__":
with Pool() as p:
a = 1
b = 2
bar = partial(foo, a, b)
res = p.map(bar, range(10))
print([r.value for r in res])

How to import static from a class

I have previously used importlib as a dynamic importing from a class by doing this:
def load_store(store: str) -> importlib:
"""
Imports the correct path for given store
:param store:
:return:
"""
mod = importlib.import_module(f"lib.vendors.{store}")
class_pointer = getattr(mod, store)()
return class_pointer
However the problem I have seen is that for some reason it calls the importlib 602 times!! whenever I do have this function
on a code that only calls the function once.
from lib.scraper import product_data
from lib.utils import load_store
# To test specific store and link
store: str = "footlockerse"
link: str = "https://www.footlocker.se/en/product/BarelyGreen-Black-White/316700362904"
# -------------------------------------------------------------------------
# Utils
# -------------------------------------------------------------------------
store_class = load_store(store=store) # <--- Calls it only once
def main():
product_data(store_class=store_class)
store = store, link = link, params = "product_page")
if __name__ == '__main__':
main()
I have later on tested to call the import static and the issue went away, However my problem is that I do have around 46 imports that I need to implement and I wonder if I could somehow import only the needed import by given the "store" variable, for example if I have given footlockerse then we import only footlockerse, is that possible?
e.g.
test = "footlockerse"
from lib.vendors.test import test

How to decorate an asyncio.coroutine to retain its __name__?

I've tried to write a decorator function which wraps an asyncio.coroutine and returns the time it took to get done. The recipe below contains the code which is working as I expected. My only problem with it that somehow I loose the name of the decorated function despite the use of #functools.wraps. How to retain the name of the original coroutine? I checked the source of asyncio.
import asyncio
import functools
import random
import time
MULTIPLIER = 5
def time_resulted(coro):
#functools.wraps(coro)
#asyncio.coroutine
def wrapper(*args, **kargs):
time_before = time.time()
result = yield from coro(*args, **kargs)
if result is not None:
raise TypeError('time resulted coroutine can '
'only return None')
return time_before, time.time()
print('= wrapper.__name__: {!r} ='.format(wrapper.__name__))
return wrapper
#time_resulted
#asyncio.coroutine
def random_sleep():
sleep_time = random.random() * MULTIPLIER
print('{} -> {}'.format(time.time(), sleep_time))
yield from asyncio.sleep(sleep_time)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
tasks = [asyncio.Task(random_sleep()) for i in range(5)]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
for task in tasks:
print(task, task.result()[1] - task.result()[0])
print('= random_sleep.__name__: {!r} ='.format(
random_sleep.__name__))
print('= random_sleep().__name__: {!r} ='.format(
random_sleep().__name__))
The result:
= wrapper.__name__: 'random_sleep' =
1397226479.00875 -> 4.261069174838891
1397226479.00875 -> 0.6596335046471768
1397226479.00875 -> 3.83421163259601
1397226479.00875 -> 2.5514027672929713
1397226479.00875 -> 4.497471439365472
Task(<wrapper>)<result=(1397226479.00875, 1397226483.274884)> 4.266134023666382
Task(<wrapper>)<result=(1397226479.00875, 1397226479.6697)> 0.6609499454498291
Task(<wrapper>)<result=(1397226479.00875, 1397226482.844265)> 3.835515022277832
Task(<wrapper>)<result=(1397226479.00875, 1397226481.562422)> 2.5536720752716064
Task(<wrapper>)<result=(1397226479.00875, 1397226483.51523)> 4.506479978561401
= random_sleep.__name__: 'random_sleep' =
= random_sleep().__name__: 'wrapper' =
As you can see random_sleep() returns a generator object with different name. I would like to retain the name of the decorated coroutine. I am not aware if this is problem is specific to asyncio.coroutines or not. I also tried the code with different decorator orders, but all has the same result. If I comment #functools.wraps(coro) then even random_sleep.__name__ becomes wrapper as I expected.
EDIT: I've posted this issue to Python Issue Tracker and received the following answer by R. David Murray: "I think this is a specific case of a more general need to improve 'wraps' that was discussed on python-dev not too long ago."
The issue is that functools.wraps changes only wrapper.__name__ and wrapper().__name__ stays wrapper. __name__ is a readonly generator attribute. You could use exec to set appropriate name:
import asyncio
import functools
import uuid
from textwrap import dedent
def wrap_coroutine(coro, name_prefix='__' + uuid.uuid4().hex):
"""Like functools.wraps but preserves coroutine names."""
# attribute __name__ is not writable for a generator, set it dynamically
namespace = {
# use name_prefix to avoid an accidental name conflict
name_prefix + 'coro': coro,
name_prefix + 'functools': functools,
name_prefix + 'asyncio': asyncio,
}
exec(dedent('''
def {0}decorator({0}wrapper_coro):
#{0}functools.wraps({0}coro)
#{0}asyncio.coroutine
def {wrapper_name}(*{0}args, **{0}kwargs):
{0}result = yield from {0}wrapper_coro(*{0}args, **{0}kwargs)
return {0}result
return {wrapper_name}
''').format(name_prefix, wrapper_name=coro.__name__), namespace)
return namespace[name_prefix + 'decorator']
Usage:
def time_resulted(coro):
#wrap_coroutine(coro)
def wrapper(*args, **kargs):
# ...
return wrapper
It works but there is probably a better way than using exec().
In the time since this question was asked, it became possible to change the name of a coroutine. It is done by setting __qualname__ (not __name__):
async def my_coro(): pass
c = my_coro()
print(repr(c))
# <coroutine object my_coro at 0x7ff8a7d52bc0>
c.__qualname__ = 'flimflam'
print(repr(c))
# <coroutine object flimflam at 0x7ff8a7d52bc0>
import asyncio
print(repr(asyncio.ensure_future(c)))
# <Task pending name='Task-737' coro=<flimflam() running at <ipython-input>:1>>
The usage of __qualname__ in a coroutine object's __repr__ is defined in the CPython source

Resources