I want my dataclass to have a field that can either be provided manually, or if it isn't, it is inferred at initialization from the other fields. MWE:
from collections.abc import Sized
from dataclasses import dataclass
from typing import Optional
#dataclass
class Foo:
data: Sized
index: Optional[list[int]] = None
def __post_init__(self):
if self.index is None:
self.index = list(range(len(self.data)))
reveal_type(Foo.index) # Union[None, list[int]]
reveal_type(Foo([1,2,3]).index) # Union[None, list[int]]
How can this be implemented in a way such that:
It complies with mypy type checking
index is guaranteed to be of type list[int]
I considered using default_factory(list), however, then how does one distinguish the User passing index=[] from the sentinel value? Is there a proper solution besides doing
index: list[int] = None # type: ignore[assignment]
You can have the default_factory return a list with a sentinel object as its only element. You just need to make sure that the sentinel is an instance of int, otherwise mypy will complain. Luckily we have identity comparisons to ensure that the check in __post_init__ is always correct.
from collections.abc import Sized
from dataclasses import dataclass, field
#dataclass
class Foo:
class _IdxSentinel(int):
pass
_idx_sentinel = _IdxSentinel()
#staticmethod
def _idx_sentinel_factory() -> list[int]:
return [Foo._idx_sentinel]
data: Sized
index: list[int] = field(default_factory=_idx_sentinel_factory)
def __post_init__(self) -> None:
if len(self.index) == 1 and self.index[0] is self.__class__._idx_sentinel:
self.index = list(range(len(self.data)))
I put the entire factory and sentinel logic inside of Foo, but if you don't like that, you can also factor it out:
from collections.abc import Sized
from dataclasses import dataclass, field
class _IdxSentinel(int):
pass
_idx_sentinel = _IdxSentinel()
def _idx_sentinel_factory() -> list[int]:
return [_idx_sentinel]
#dataclass
class Foo:
data: Sized
index: list[int] = field(default_factory=_idx_sentinel_factory)
def __post_init__(self) -> None:
if len(self.index) == 1 and self.index[0] is _idx_sentinel:
self.index = list(range(len(self.data)))
EDIT: Inspired by the comment from #SUTerliakov, here is slightly less verbose version that still satisfies type checkers and linters using a lambda-expression instead of a named function:
from collections.abc import Sized
from dataclasses import dataclass, field
#dataclass
class Foo:
class _IdxSentinel(int):
pass
_idx_sentinel = _IdxSentinel()
data: Sized
index: list[int] = field(default_factory=lambda: [Foo._idx_sentinel])
def __post_init__(self) -> None:
if len(self.index) == 1 and self.index[0] is self.__class__._idx_sentinel:
self.index = list(range(len(self.data)))
Use NotImplemented
from collections.abc import Sized
from dataclasses import dataclass
#dataclass
class Foo:
data: Sized
index: list[int] = NotImplemented
def __post_init__(self):
if self.index is NotImplemented:
self.index = list(range(len(self.data)))
Related
I want to define a function for base classes and get the right return type for calls with derived classes. E.g.
# Module 1:
from typing import TypeVar
class Food:
pass
class Animal:
def __init__(self, food: Food) -> None:
self.food=food
T = TypeVar("T", bound=Food)
S = TypeVar("S", bound=Animal)
def get_food(animal: S) -> T: # Illustrates what I want but not working.
return animal.food
food = get_food(Animal(Food()))
reveal_type(food) # Food.
# Module 2:
class Carrot(Food):
pass
class Rabbit(Animal):
def __init__(self, food: Carrot) -> None:
self.food=food
food = get_food(Rabbit(Carrot()))
reveal_type(food) # Food. Want Carrot.
The options I know are:
using the #overload decorator, but this means module 1 needs to be aware of the inheriting types in module 2 - which is a problem
have a new get_food in module 2 that delegates to module 1 and explicitly cast the return type:
def get_food(rabbit: Rabbit) -> Carrot:
return cast(Carrot, get_food(rabbit))
Any better way?
You need to make your Animal class generic in food type. It means basically that any [non-strict] Animal subclass has some sort of food ([non-strict] subclass of Food) associated with it.
from typing import Generic, TypeVar
class Food:
pass
_F = TypeVar("_F", bound=Food)
class Animal(Generic[_F]):
def __init__(self, food: _F) -> None:
self.food = food
def get_food(animal: Animal[_F]) -> _F:
return animal.food
food = get_food(Animal(Food()))
reveal_type(food) # N: Revealed type is "__main__.Food"
class Carrot(Food):
pass
class Rabbit(Animal[Carrot]):
pass
food = get_food(Rabbit(Carrot()))
reveal_type(food) # N: Revealed type is "__main__.Carrot"
Here's a playground link and a relevant documentation on generic classes.
Is it possible to add/overwrite a type hint in case of the following example?
The example is just to get an idea of what I mean, by no means is this something that I would use in this way.
from dataclasses import dataclass
def wrapper(f):
def deco(instance):
if not instance.user:
instance.user = data(name="test")
return f(instance)
return deco
#dataclass
class data:
name: str
class test_class:
def __init__(self):
self.user: None | data = None
#wrapper
def test(self):
print(self.user.name)
x = test_class()
x.test()
The issue is that type hinting does not understand that the decorated method's user attribute is not None, thus showing a linting error that name is not a known member of none.
Of course this code could be altered so that instead of using a decorator it would just do something like this:
def test(self):
if not self.user:
...
print(self.user.name)
But that is not the point. I just want to know if it is possible to let the type hinter know that the attribute is not None. I could also just suppress the warning but that is not what I am looking for.
I would use the good ol' assert and be done with it:
...
#wrapper
def test(self):
assert isinstance(self.user, data)
print(self.user.name)
I realize this is a crude way as opposed to some annotation magic you might have expected for the decorator, but in my opinion this is the most practical approach.
There are countless other situations that can be constructed, where the type of some instance attribute may be altered externally. In those cases the use of such a simple assertion is not only for the benefit of the static type checker, but can also save you from shooting yourself in the foot, if you decide to alter that external behavior.
Alternative - Getter
Another possibility is to make the user attribute private and add a function (or property) to get it, which ensures that it is not None. Here is a working example:
from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
from typing import TypeVar
T = TypeVar("T")
#dataclass
class Data:
name: str
def wrapper(f: Callable[[TestClass], T]) -> Callable[[TestClass], T]:
def deco(self: TestClass) -> T:
try:
_ = self.user
except RuntimeError:
self.user = Data(name="test")
return f(self)
return deco
class TestClass:
def __init__(self) -> None:
self._user: None | Data = None
#property
def user(self) -> Data:
if self._user is None:
raise RuntimeError
return self._user
#user.setter
def user(self, data: Data) -> None:
self._user = data
#wrapper
def test(self) -> None:
print(self.user.name)
if __name__ == '__main__':
x = TestClass()
x.test()
Depending on the use case, this might actually be preferred because otherwise, user being a public attribute, all outside code that wants to use TestClass will face the same problem of never being sure if user is None or not, thus being forced to do the same checks again and again.
Sadly there isn't really a satisfactory answer to your question. The problem is that no type-checkers execute any code - that means that any dynamic type generation doesn't work. For that reason, if you want to tell the type-checker that the self.user is not None you need to create a class where user is not Optional.
I don't think it's a good idea but here is how you could achieve what you want to achieve. Note though that that way you need to keep the two classes in sync and some type-checkers have trouble with decorators...
from typing import ParamSpec, TypeVar, Concatenate, Callable, cast
from dataclasses import dataclass
T = TypeVar("T") # generic return value
P = ParamSpec("P") # all other params after self
def wrapper( # this wrapper works on any functions in 'test_class'
f: Callable[Concatenate["test_class", P], T]
) -> Callable[Concatenate["__non_optional_user_test_class", P], T]:
def deco(instance: "test_class", *args: P.args, **kwargs: P.kwargs):
if not instance.user:
instance.user = data(name="test")
return f(cast("__non_optional_user_test_class", instance), *args, **kwargs)
return deco
#dataclass
class data:
name: str
class __non_optional_user_test_class:
user: data
class test_class:
def __init__(self):
self.user: None | data = None
#wrapper
def test(self):
print(self.user.name)
x = test_class()
x.test()
You sadly cannot generate the __non_optional_user_test_class dynamically in such a way that type-checkers understand them...
And you would need to write a new wrapper for all classes where you want to apply this #wrapper.
There's a base data class as follows:
class BaseClass:
def __init__(self, attribute_1: Any):
self.attribute_1 = attribute_1
There's an inherited data class using the above class as base class:
class DataClass(BaseClass):
def __init__(self, attribute_1: Any, attribute_2: Dict[str, str], attribute_3: List[str]):
super().__init__(attribute_1)
self.attribute_2 = attribute_2
self.attribute_3 = attribute_3
There's another BaseClass which expects an instance of BaseClass to work as follows:
class BaseActionClass:
def __init__(self, attribute_a1: BaseClass, attribute_a2: Dict[str, str])
self.attribute_a1 = attribute_a1
self.attribute_a2 = attribute_a2
def do_action_one(self):
pass
def do_action_two(self):
pass
There's an ActionClass which uses this BaseActionClass to perform some actions:
class ActionClass(BaseActionClass):
def __init__(self, attribute_a1: DataClass, attribute_a2: Dict[str, str]):
super().__init__(attribute_a1, attribute_a2)
def do_action_one(self):
do_statement_1
x = self.attribute_a1.attribute_1
y = self.attribute_a1.attribute_2
def do_action_two(self):
do_something
In ActionClass.do_action_one, when writing y = self.attribute_a1.attribute_2, PyCharm shows a typing error of Unresolved attribute reference 'attribute_2' for class 'BaseClass'. How to resolve this typing error which is shown by the IDE, and why would this happen since DataClass is already inheriting from BaseClass?
The problem now is that ActionClass.attribute_a1 is still BaseClass, because its base was declared so. It is absolutely fine, because in ActionClass you do not enforce attribute_a1 to be DataClass, but only limit __init__ method to it. Were it another method (not __init__, but, say, set_attribute_a1 - let's forget about properties for now), you would also violate LSP this way.
I can suggest two ways to go:
Generic
I'd assume BaseClass and DataClass definitions as yours. Then the following will work:
from typing import Generic, TypeVar
_T = TypeVar('_T', bound=BaseClass)
class BaseActionClass(Generic[_T]):
def __init__(self, attribute_a1: _T, attribute_a2: dict[str, str]) -> None:
self.attribute_a1: _T = attribute_a1
self.attribute_a2 = attribute_a2
def do_action_one(self) -> None:
pass
class ActionClass(BaseActionClass[DataClass]):
# Note you don't even need to override __init__ now, it follows from generic defn
def do_action_one(self) -> None:
self.attribute_a1.attribute_1
self.attribute_a1.attribute_2
Direct
class BaseActionClass:
def __init__(self, attribute_a1: BaseClass, attribute_a2: dict[str, str])
self.attribute_a1 = attribute_a1
self.attribute_a2 = attribute_a2
def do_action_one(self):
pass
def do_action_two(self):
pass
class ActionClass(BaseActionClass):
attribute_1: DataClass
def __init__(self, attribute_a1: DataClass, attribute_a2: dict[str, str]):
super().__init__(attribute_a1, attribute_a2)
def do_action_one(self):
self.attribute_a1.attribute_1
self.attribute_a1.attribute_2
The former solution is preferred, because it reveals your intention initially and is more semantically correct. It means roughly the following: class BaseActionClass has attribute_a1 parameter of type _T, which can be substituted by any BaseClass subclass (including BaseClass itself). When you subclass BaseActionClass[DataClass], you enforce _T substitution with DataClass. You can still do BaseActionClass(BaseClass(), {}) and _T will be BaseClass, but ActionClass(BaseClass(), {}) is rejected now.
The latter solution is much less elegant. I'd advice to use it only if you don't have access to modify BaseActionClass (for example, it is 3rd-party module and you don't want/can't create a PR to it).
I want to validate since the instance creation if the type is right or wrong,
i tried using #dataclass decorator but doesn't allow me to use the __init__ method, i also tried using a custom like class type
also in order of the type made some validations (if is a int, that field>0 or if is a str clean whitespaces, for example),
i could use a dict to validate the type, but i want to know if there's a way to do it in pythonic way
class Car(object):
""" My class with many fields """
color: str
name: str
wheels: int
def __init__(self):
""" Get the type of fields and validate """
pass
You can use the __post_init__ method of dataclasses to do your validations.
Below I just confirm that everything is an instance of the indicated type
from dataclasses import dataclass, fields
def validate(instance):
for field in fields(instance):
attr = getattr(instance, field.name)
if not isinstance(attr, field.type):
msg = "Field {0.name} is of type {1}, should be {0.type}".format(field, type(attr))
raise ValueError(msg)
#dataclass
class Car:
color: str
name: str
wheels: int
def __post_init__(self):
validate(self)
An alternative to #dataclass is to use pyfields. It provides validation and conversion out of the box, and is directly done at the field level so you can use fields inside any class, without modifying them in any way.
from pyfields import field, init_fields
from valid8.validation_lib import is_in
ALLOWED_COLORS = ('blue', 'yellow', 'brown')
class Car(object):
""" My class with many fields """
color: str = field(check_type=True, validators=is_in(ALLOWED_COLORS))
name: str = field(check_type=True, validators={'should be non-empty': lambda s: len(s) > 0})
wheels: int = field(check_type=True, validators={'should be positive': lambda x: x > 0})
#init_fields
def __init__(self, msg="hello world!"):
print(msg)
c = Car(color='blue', name='roadie', wheels=3)
c.wheels = 'hello' # <-- (1) type validation error, see below
c.wheels = 0 # <-- (2) value validation error, see below
yields the following two errors
TypeError: Invalid value type provided for '<...>.Car.wheels'.
Value should be of type <class 'int'>. Instead, received a 'str': 'hello'
and
valid8.entry_points.ValidationError[ValueError]:
Error validating [<...>.Car.wheels=0].
InvalidValue: should be positive.
Function [<lambda>] returned [False] for value 0.
See pyfields documentation for details. I'm the author by the way :)
Dataclasses do not check the data. But I made a small superstructure for dataclasses, and you can use it this way:
import json
from dataclasses import dataclass
from validated_dc import ValidatedDC
#dataclass
class Car(ValidatedDC):
color: str
name: str
wheels: int
# This string was received by api
data = '{"color": "gray", "name": "Delorean", "wheels": 4}'
# Let's upload this json-string to the dictionary
data = json.loads(data)
car = Car(**data)
assert car.get_errors() is None
# Let's say the key "color" got the wrong value:
data['color'] = 11111
car = Car(**data)
assert car.get_errors()
print(car.get_errors())
# {
# 'color': [
# BasicValidationError(
# value_repr='11111', value_type=<class 'int'>,
# annotation=<class 'str'>, exception=None
# )
# ]
# }
# fix
car.color = 'gray'
# is_valid() - Starting validation of an already created instance
# (if True returns, then there are no errors)
assert car.is_valid()
assert car.get_errors() is None
ValidatedDC: https://github.com/EvgeniyBurdin/validated_dc
Use pydantic.
In this example, the field password1 is only validated for being a string, while other fields have custom validator functions.
from pydantic import BaseModel, ValidationError, validator
class UserModel(BaseModel):
name: str
username: str
password1: str
password2: str
#validator('name')
def name_must_contain_space(cls, v):
if ' ' not in v:
raise ValueError('must contain a space')
return v.title()
#validator('password2')
def passwords_match(cls, v, values, **kwargs):
if 'password1' in values and v != values['password1']:
raise ValueError('passwords do not match')
return v
#validator('username')
def username_alphanumeric(cls, v):
assert v.isalnum(), 'must be alphanumeric'
return v
user = UserModel(
name='samuel colvin',
username='scolvin',
password1='zxcvbn',
password2='zxcvbn',
)
print(user)
#> name='Samuel Colvin' username='scolvin' password1='zxcvbn' password2='zxcvbn'
try:
UserModel(
name='samuel',
username='scolvin',
password1='zxcvbn',
password2='zxcvbn2',
)
except ValidationError as e:
print(e)
"""
2 validation errors for UserModel
name
must contain a space (type=value_error)
password2
passwords do not match (type=value_error)
"""
I have a method that returns dynamic type based on the class I pass in:
def foo(cls):
return cls()
How can I setup typing for this function?
After reading this article https://blog.yuo.be/2016/05/08/python-3-5-getting-to-grips-with-type-hints/, I found solution myself:
from typing import TypeVar, Type
class A:
def a(self):
return 'a'
class B(A):
def b(self):
return 'b'
T = TypeVar('T')
def foo(a: T) -> T:
return a()
This template suites my question above, but actually, my need is a little bit different that I need to work more. Below I include my problem and solution:
Problem: I want to use the with keyword like this:
with open_page(PageX) as page:
page.method_x() # method x is from PageX
Solution
from typing import TypeVar, Type, Generic
T = TypeVar('T')
def open_page(cls: Type[T]):
class __F__(Generic[T]):
def __init__(self, cls: Type[T]):
self._cls = cls
def __enter__(self) -> T:
return self._cls()
def __exit__(self, exc_type, exc_val, exc_tb):
pass
return __F__(cls)
So, when I use with PyCharm, it's able to suggest method_x when I pass PageX into with open_page(PageX) as page: