Case-insensitive getattr - python-3.x

Given a module I want to be able to search for classes in that module with case insensitivity.
For example if I have the following module utils/helpers.py
class UtilityClass:
def __init__(self):
...
In a different script, I want to be able to retrieve the class by its name in a case insensitive way
import utils.helpers as util_helpers
module = getattr(util_helpers, 'utilityclass')
What is the right and most Pythonic way to implement this?

You can override builtins.getattr with a case-insensitive version:
import builtins
import pprint
def igetattr(obj, attr):
for a in dir(obj):
if a.lower() == attr.lower():
return orig_getattr(obj, a)
orig_getattr = builtins.getattr
builtins.getattr = igetattr
print(getattr(pprint, 'prettyprinter'))
This outputs: <class 'pprint.PrettyPrinter'>

Related

Python create dynamic class and set multi bases from imported module

I found several example here, but is not what exactly looking for, will try to explain here
from this answer tried to achieve my result but is not what looking for
How can I dynamically create derived classes from a base class
i have a module that holds many classes
importing the module
import importlib
# are awalable many classes here
forms = importlib.import_module('my_forms')
Now, based on forms i need to create a new class and add bases to my new class all classes that are availabe in forms
this what i tried, but can not find a way to assign the bases
import inspect
def create_DynamicClass():
class DynamicClass(BaseClass):
pass
for form_name, class_name in inspect.getmembers(forms):
for i in class_name():
# here the code to added all bases to DynamicClass
return DynamicClass()
example how my_forms module looks
class MyClass1(BaseClass):
attr1 = 1
attr2 = 2
#coroutine
def prepare(self):
# some code for each class
class MyClass2(BaseClass):
attr3 = 3
attr4 = 4
#coroutine
def prepare(self):
# some code for each class
class MyClass3(BaseClass):
attr5 = 5
attr6 = 6
#coroutine
def prepare(self):
# some code for each class
The result that i want to achieve is the following, will make a static class to show desired result but need to be dynamic
I need to create my class dynamic because in my_forms module can be any amount of classes
# inherits all classes from my_forms module
class MyResultClass(MyClass1, MyClass2, MyClass3):
# here get all available attributes from all classes
#coroutine
def prepare(self):
# as well need each prepare function for each class as well
yield MyClass1().prepare()
yield MyClass2().prepare()
yield MyClass3().prepare()
Simply declare the dynamic class with all of your base classes. To do so, put all of your base classes in a list, and unpack the list in the class definition statement with the * operator like this:
def createClass(baseClasess):
class NewClass(*baseClasses):
pass
return NewClass
DynamicClass = createClass([class1, class2, ...])
i have managed to find a solution, will post here, if any recommendation to make it better will appreciate
forms = importlib.import_module('my_forms')
class Form(BaseForm):
#coroutine
def prepare(self):
for form_name, class_name in inspect.getmembers(forms, inspect.isclass):
try:
yield class_name().prepare()
except TypeError:
continue
def createClass(meta):
for form_name, class_name in inspect.getmembers(forms, inspect.isclass):
try:
Form.__bases__ += (class_name, )
for field in class_name():
field_type = fl.SelectField() if hasattr(field, 'choices') else fl.StringField()
setattr(Form, field.name, field_type)
except TypeError:
continue
return Form(meta=meta)

python: multiple functions or abstract classes when dealing with data flow requirement

I have more of a design question, but I am not sure how to handle that. I have a script preprocessing.py where I read a .csv file of text column that I would like to preprocess by removing punctuations, characters, ...etc.
What I have done now is that I have written a class with several functions as follows:
class Preprocessing(object):
def __init__(self, file):
self.my_data = pd.read_csv(file)
def remove_punctuation(self):
self.my_data['text'] = self.my_data['text'].str.replace('#','')
def remove_hyphen(self):
self.my_data['text'] = self.my_data['text'].str.replace('-','')
def remove_words(self):
self.my_data['text'] = self.my_data['text'].str.replace('reference','')
def save_data(self):
self.my_data.to_csv('my_data.csv')
def preprocessing(file_my):
f = Preprocessing(file_my)
f.remove_punctuation()
f.remove_hyphen()
f.remove_words()
f.save_data()
return f
if __name__ == '__main__':
preprocessing('/path/to/file.csv')
although it works fine, i would like to be able to expand the code easily and have smaller classes instead of having one large class. So i decided to use abstract class:
import pandas as pd
from abc import ABC, abstractmethod
my_data = pd.read_csv('/Users/kgz/Desktop/german_web_scraping/file.csv')
class Preprocessing(ABC):
#abstractmethod
def processor(self):
pass
class RemovePunctuation(Preprocessing):
def processor(self):
return my_data['text'].str.replace('#', '')
class RemoveHyphen(Preprocessing):
def processor(self):
return my_data['text'].str.replace('-', '')
class Removewords(Preprocessing):
def processor(self):
return my_data['text'].str.replace('reference', '')
final_result = [cls().processor() for cls in Preprocessing.__subclasses__()]
print(final_result)
So now each class is responsible for one task but there are a few issues I do not know how to handle since I am new to abstract classes. first, I am reading the file outside the classes, and I am not sure if that is good practice? if not, should i pass it as an argument to the processor function or have another class who is responsible to read the data.
Second, having one class with several functions allowed for a flow, so every transformation happened in order (i.e, first punctuation is removes, then hyphen is removed,...etc) but I do not know how to handle this order and dependency in abstract classes.

Extract item for each spider in scrapy project

I have over a dozen spiders in a scrapy project with variety of items being extracted from different sources, including others elements mostly i have to copy same regex code over and over again in each spider for example
item['element'] = re.findall('my_regex', response.text)
I use this regex to get same element which is defined in scrapy items, is there a way to avoid copying? where do i put this in project so that i don't have to copy this in each spider and only add those that are different.
my project structure is default
any help is appreciated thanks in advance
So if I understand your question correctly, you want use the same regular expression across multiple spiders.
You can do this:
create a python module called something like regex_to_use
inside that module place your regular expression.
example:
# regex_to_use.py
regex_one = 'test'
You can access this express this one in your spiders.
# spider.py
import regex_to_use
import re as regex
find_string = regex.search(regex_to_use.regex_one, ' this is a test')
print(find_string)
# output
<re.Match object; span=(11, 15), match='test'>
You could also do something like this in your regex_to_use module
# regex_to_use.py
import re as regex
class CustomRegularExpressions(object):
def __init__(self, text):
"""
:param text: string containing the variable to search for
"""
self._text = text
def search_text(self):
find_xyx = regex.search('test', self._text)
return find_xyx
and you would call it this way in your spiders:
# spider.py
from regex_to_use import CustomRegularExpressions
find_word = CustomRegularExpressions('this is a test').search_text()
print(find_word)
# output
<re.Match object; span=(10, 14), match='test'>
If you have multiple regular expressions you could do something like this:
# regex_to_use.py
import re as regex
class CustomRegularExpressions(object):
def __init__(self, text):
"""
:param text: string containing the variable to search for
"""
self._text = text
def search_text(self, regex_to_use):
regular_expressions = {"regex_one": 'test_1', "regex_two": 'test_2'}
expression = ''.join([v for k, v in regular_expressions.items() if k == regex_to_use])
find_xyx = regex.search(expression, self._text)
return find_xyx
# spider.py
from regex_to_use import CustomRegularExpressions
find_word = CustomRegularExpressions('this is a test').search_text('regex_one')
print(find_word)
# output
<re.Match object; span=(10, 14), match='test'>
You can also use a staticmethod in the class CustomRegularExpressions
# regex_to_use.py
import re as regex
class CustomRegularExpressions:
#staticmethod
def search_text(regex_to_use, text_to_search):
regular_expressions = {"regex_one": 'test_1', "regex_two": 'test_2'}
expression = ''.join([v for k, v in regular_expressions.items() if k == regex_to_use])
find_xyx = regex.search(expression, text_to_search)
return find_xyx
# spider.py
from regex_to_use import CustomRegularExpressions
# find_word would be replaced with item['element']
# this is a test would be replaced with response.text
find_word = CustomRegularExpressions.search_text('regex_one', 'this is a test')
print(find_word)
# output
<re.Match object; span=(10, 14), match='test'>
If you use docstrings in the function search_text() you can see the regular expressions in the Python dictionary.
Showing how all this works...
This is a python project that I wrote and published. Take a look at the folder utilities. In this folder I have functions that I can use throughout my code without having to copy and paste the same code over and over.
There is a lot of common data that is usual to use across multiple spiders, like regex or even XPath.
It's a good idea to isolate them.
You can use something like this:
/project
/site_data
handle_responses.py
...
/spiders
your_spider.py
...
Isolate functionalities with a common purpose.
# handle_responses.py
# imports ...
from re import search
def get_specific_commom_data(text: str):
# probably is a good idea handle predictable errors here (`try except`)
return search('your_regex', text)
And just use where is needed that functionality.
# your_spider.py
# imports ...
import scrapy
from site_data.handle_responses import get_specific_commom_data
class YourSpider(scrapy.Spider):
# ... previous code
def your_method(self, response):
# ... previous code
item['element'] = get_specific_commom_data(response.text)
Try to keep it simple and do what you need to solve your problem.
I can copy regex in multiple spiders instead of importing object from other .py files, i understand they have the use case but here i don't want to add anything to any of the spiders but still want the element in result
There are some good answers to this but don't really solve the problem so after searching for days i have come to this solution i hope its useful for others looking for similar answer.
#middlewares.py
import yourproject.items import youritem()
#find the function and add your element
def process_spider_output(self, response, result, spider):
item = YourItem()
item['element'] = re.findall('my_regex', response.text)
now uncomment middleware from
#settings.py
SPIDER_MIDDLEWARES = {
'yourproject.middlewares.YoursprojectMiddleware': 543,
}
For each spider you will get element in result data, i am still searching for better solution and i will update the answer because it slows the spider,

Python: Pseudo Enums - Classes as enums - how to avoid cyclic import

I want to create a pseudo enums in my python project.
The values are actually classes.
# file my_enums.py
import MyClass1
import MyClass2
import MyClass3
class MyEnum:
MY_CLASS_1 = MyClass1
MY_CLASS_2 = MyClass2
MY_CLASS_3 = MyClass3
# file my_class1.py
import MyEnum
class MyClass1:
def foo(self, x):
print(isinstance(x, MyEnum.MY_CLASS_2))
Doing this will result in cyclic import error.
I want to be able to use the MyEnum values in isinstance function and to import the enum to modules that define some of those classes.
Is there a way to do so?
Solution:
# file my_enums.py
import MyClass1
import MyClass2
import MyClass3
class MyEnum:
MY_CLASS_1 = None
MY_CLASS_2 = None
MY_CLASS_3 = None
#classmethod
def define(cls):
cls.MY_CLASS_1 = MyClass1
MyEnum.define()
The thing to remember is that when a module is loaded, it is executed -- but only top level statements and the immediate interior of top-level classes; the bodies of functions and methods are not evaluated until they are actually called.
# example module
CONSTANT = 7 # top-level, executed
def a_func(value=CONSTANT): # top-level, executed
return value + 9 # body, not executed
class a_class(metaclass=SomeMeta): # top-level, executed (and error as SomeMeta
# has not been defined nor imported)
CLS_CONSTANT = 3 # top-level class body, executed
def a_method(self): # executed
return self.CLS_CONSTANT + FUTURE_CONSTANT # method body, not executed
FUTURE_CONSTANT = 11
So in your example you need to make sure and not use MyEnum anywhere in my_class1.py that will be executed during import, and put the import of my_enums.py at the very end -- then when my_enums.py is executed during its import it will be able to import my_class1 which will, at that point, have the classes defined.

Is there a pythonic way to replace global variable or class variable

I used class variables to replace avoid using global variables,But I think it's not a pythonic way,Can any one give me a better way?
class variable way:
class A(object):
_func_map=dynamic_load_from_module()
_ad_map = dynamic_load_from_another_module()
#classmethod
def check_status(cls,host,port,user):
#do something other
return cls._func_map[user].verify()
#classmethod
def check_ad(cls,host,port,user):
#do something other
return cls._ad_map[user].check()
global variable way:
_global_func_map = dynamic_load_from_module()
_global_ad_map = dynamic_load_from_another_module()
def check_status(host,port,user):
#do something other
global _global_func_map
return _global_func_map[user].verify()
def check_ad(host,port,user):
#do something other
global _ad_map
return _ad_map[user].check()
I guess the most pythonic way is a module:
#!/usr/env/bin/python
def dynamic_load_from_module(): pass
def dynamic_load_from_another_module(): pass
fun_map = dynamic_load_from_module()
ad_map = dynamic_load_from_another_module()
So you use it like a module:
from module import fun_map, ad_map
class Foo(object):
#classmethod
def check_ad(cls,host,port,user):
return ad_map[user].check()
But if you need them to be called at the construction of your instance, you may want to do something like
#!/usr/env/bin/python
def dynamic_load_from_module(): pass
def dynamic_load_from_another_module(): pass
(So you just define functions within the module)
from module import dynamic_load_from_module, dynamic_load_from_another_module
class Foo(object):
def __init__(self):
self._fun_map = dynamic_load_from_module()
self._ad_map = dynamic_load_from_another_module()
Or if you need them to be called at the construction of the instance but still be class' attribute:
from module import dynamic_load_from_module, dynamic_load_from_another_module
class Foo(object):
_fun_map = dynamic_load_from_module()
_ad_map = dynamic_load_from_another_module()
There lot of other ways (properties, class method, static methods, etc..), but I'm pretty sure the module is the most pythonic one. Moreover it's really easy to setup, read and understand - so why not.

Resources