How to validate typing attributes in Python 3.7 - python-3.x

I want to validate since the instance creation if the type is right or wrong,
i tried using #dataclass decorator but doesn't allow me to use the __init__ method, i also tried using a custom like class type
also in order of the type made some validations (if is a int, that field>0 or if is a str clean whitespaces, for example),
i could use a dict to validate the type, but i want to know if there's a way to do it in pythonic way
class Car(object):
""" My class with many fields """
color: str
name: str
wheels: int
def __init__(self):
""" Get the type of fields and validate """
pass

You can use the __post_init__ method of dataclasses to do your validations.
Below I just confirm that everything is an instance of the indicated type
from dataclasses import dataclass, fields
def validate(instance):
for field in fields(instance):
attr = getattr(instance, field.name)
if not isinstance(attr, field.type):
msg = "Field {0.name} is of type {1}, should be {0.type}".format(field, type(attr))
raise ValueError(msg)
#dataclass
class Car:
color: str
name: str
wheels: int
def __post_init__(self):
validate(self)

An alternative to #dataclass is to use pyfields. It provides validation and conversion out of the box, and is directly done at the field level so you can use fields inside any class, without modifying them in any way.
from pyfields import field, init_fields
from valid8.validation_lib import is_in
ALLOWED_COLORS = ('blue', 'yellow', 'brown')
class Car(object):
""" My class with many fields """
color: str = field(check_type=True, validators=is_in(ALLOWED_COLORS))
name: str = field(check_type=True, validators={'should be non-empty': lambda s: len(s) > 0})
wheels: int = field(check_type=True, validators={'should be positive': lambda x: x > 0})
#init_fields
def __init__(self, msg="hello world!"):
print(msg)
c = Car(color='blue', name='roadie', wheels=3)
c.wheels = 'hello' # <-- (1) type validation error, see below
c.wheels = 0 # <-- (2) value validation error, see below
yields the following two errors
TypeError: Invalid value type provided for '<...>.Car.wheels'.
Value should be of type <class 'int'>. Instead, received a 'str': 'hello'
and
valid8.entry_points.ValidationError[ValueError]:
Error validating [<...>.Car.wheels=0].
InvalidValue: should be positive.
Function [<lambda>] returned [False] for value 0.
See pyfields documentation for details. I'm the author by the way :)

Dataclasses do not check the data. But I made a small superstructure for dataclasses, and you can use it this way:
import json
from dataclasses import dataclass
from validated_dc import ValidatedDC
#dataclass
class Car(ValidatedDC):
color: str
name: str
wheels: int
# This string was received by api
data = '{"color": "gray", "name": "Delorean", "wheels": 4}'
# Let's upload this json-string to the dictionary
data = json.loads(data)
car = Car(**data)
assert car.get_errors() is None
# Let's say the key "color" got the wrong value:
data['color'] = 11111
car = Car(**data)
assert car.get_errors()
print(car.get_errors())
# {
# 'color': [
# BasicValidationError(
# value_repr='11111', value_type=<class 'int'>,
# annotation=<class 'str'>, exception=None
# )
# ]
# }
# fix
car.color = 'gray'
# is_valid() - Starting validation of an already created instance
# (if True returns, then there are no errors)
assert car.is_valid()
assert car.get_errors() is None
ValidatedDC: https://github.com/EvgeniyBurdin/validated_dc

Use pydantic.
In this example, the field password1 is only validated for being a string, while other fields have custom validator functions.
from pydantic import BaseModel, ValidationError, validator
class UserModel(BaseModel):
name: str
username: str
password1: str
password2: str
#validator('name')
def name_must_contain_space(cls, v):
if ' ' not in v:
raise ValueError('must contain a space')
return v.title()
#validator('password2')
def passwords_match(cls, v, values, **kwargs):
if 'password1' in values and v != values['password1']:
raise ValueError('passwords do not match')
return v
#validator('username')
def username_alphanumeric(cls, v):
assert v.isalnum(), 'must be alphanumeric'
return v
user = UserModel(
name='samuel colvin',
username='scolvin',
password1='zxcvbn',
password2='zxcvbn',
)
print(user)
#> name='Samuel Colvin' username='scolvin' password1='zxcvbn' password2='zxcvbn'
try:
UserModel(
name='samuel',
username='scolvin',
password1='zxcvbn',
password2='zxcvbn2',
)
except ValidationError as e:
print(e)
"""
2 validation errors for UserModel
name
must contain a space (type=value_error)
password2
passwords do not match (type=value_error)
"""

Related

Dataclass Optional Field that is Inferred if Missing

I want my dataclass to have a field that can either be provided manually, or if it isn't, it is inferred at initialization from the other fields. MWE:
from collections.abc import Sized
from dataclasses import dataclass
from typing import Optional
#dataclass
class Foo:
data: Sized
index: Optional[list[int]] = None
def __post_init__(self):
if self.index is None:
self.index = list(range(len(self.data)))
reveal_type(Foo.index) # Union[None, list[int]]
reveal_type(Foo([1,2,3]).index) # Union[None, list[int]]
How can this be implemented in a way such that:
It complies with mypy type checking
index is guaranteed to be of type list[int]
I considered using default_factory(list), however, then how does one distinguish the User passing index=[] from the sentinel value? Is there a proper solution besides doing
index: list[int] = None # type: ignore[assignment]
You can have the default_factory return a list with a sentinel object as its only element. You just need to make sure that the sentinel is an instance of int, otherwise mypy will complain. Luckily we have identity comparisons to ensure that the check in __post_init__ is always correct.
from collections.abc import Sized
from dataclasses import dataclass, field
#dataclass
class Foo:
class _IdxSentinel(int):
pass
_idx_sentinel = _IdxSentinel()
#staticmethod
def _idx_sentinel_factory() -> list[int]:
return [Foo._idx_sentinel]
data: Sized
index: list[int] = field(default_factory=_idx_sentinel_factory)
def __post_init__(self) -> None:
if len(self.index) == 1 and self.index[0] is self.__class__._idx_sentinel:
self.index = list(range(len(self.data)))
I put the entire factory and sentinel logic inside of Foo, but if you don't like that, you can also factor it out:
from collections.abc import Sized
from dataclasses import dataclass, field
class _IdxSentinel(int):
pass
_idx_sentinel = _IdxSentinel()
def _idx_sentinel_factory() -> list[int]:
return [_idx_sentinel]
#dataclass
class Foo:
data: Sized
index: list[int] = field(default_factory=_idx_sentinel_factory)
def __post_init__(self) -> None:
if len(self.index) == 1 and self.index[0] is _idx_sentinel:
self.index = list(range(len(self.data)))
EDIT: Inspired by the comment from #SUTerliakov, here is slightly less verbose version that still satisfies type checkers and linters using a lambda-expression instead of a named function:
from collections.abc import Sized
from dataclasses import dataclass, field
#dataclass
class Foo:
class _IdxSentinel(int):
pass
_idx_sentinel = _IdxSentinel()
data: Sized
index: list[int] = field(default_factory=lambda: [Foo._idx_sentinel])
def __post_init__(self) -> None:
if len(self.index) == 1 and self.index[0] is self.__class__._idx_sentinel:
self.index = list(range(len(self.data)))
Use NotImplemented
from collections.abc import Sized
from dataclasses import dataclass
#dataclass
class Foo:
data: Sized
index: list[int] = NotImplemented
def __post_init__(self):
if self.index is NotImplemented:
self.index = list(range(len(self.data)))

Can I make a default value in pydantic if None is passed in the field without using validators?

Can I make a default value in pydantic if None is passed in the field without using validators?
I have the following code, but it seems to me that the validator here is superfluous for contract_ndfl. Is there any way to do without a validator?
My code:
class User(BaseModel):
user: int
s_name: str
contract_ndfl: Optional[int]
#validator('contract_ndfl')
def set_contract_ndfl(cls, v):
return v or 13
Wishful code:
class User(BaseModel):
user: int
s_name: str
contract_ndfl: Optional[int] = 13
Maybe you can use a validator for all field and define a BaseClass for it!
class NoneDefaultModel(BaseModel):
#validator("*", pre=True)
def not_none(cls, v, field):
if all(
(
# Cater for the occasion where field.default in (0, False)
getattr(field, "default", None) is not None,
v is None,
)
):
return field.default
else:
return v
Then you can use a Subclass to Implement your wishful code:
class Bar(NoneDefaultModel):
bar: int = 5
Bar(bar=None)
# Bar(bar=5)

How to get a list of a Python class' variables with their docstring documentation

How can I get a list of class member variables and their docstring description from a Python class or dataclass?
I have this dataclass
#dataclass
class MyDataClass(BaseDataClass):
"""
:param var1: Description for var1
:param var2: Description for var2
"""
var1: int
var2: int
and I want to be able to get this information for it:
{
'var1': ('int', 'Description for var1'),
'var2': ('int', 'Description for var2')
}
To get a list of class member variables and their description I created an empty dataclass with the classmethod
get_user_parameters_dict(cls)
Which parses the class docstring and returns a duct with the member's name, type and description.
Since the docstring is not auto-generated, you need to follow the example in the code below. The output of the main section is
Output:
This does not work for a class without properties
This class is probably BaseDataClass. Is it? Actual name: BaseDataClass
var1: int - Description for var1
var2: int - Description for var2
from dataclasses import dataclass
from typing import Dict, Tuple
import re
Code:
#dataclass
class BaseDataClass:
"""
Example of how to docstring var1 and var2, so reflection works here
:param var1: Description for var1
:param var2: Description for var2
"""
# Example of declaring properties var1 and var2
# var1: int
# var2: int
#classmethod
def get_user_parameters_dict(cls) -> Dict[str, Tuple[str, str]]:
properties_descriptions: Dict[str, str] = {}
regex = r":param *(.*): *(.*)"
from typing import Generator
matches: Generator[re.Match, None, None] = re.finditer(regex, cls.__doc__, re.MULTILINE)
for match in matches:
prop_name: str = match.groups()[0]
prop_description: str = match.groups()[1]
properties_descriptions[prop_name] = prop_description
dataclass_properties: Dict[str, Tuple[str, str]] = {}
try:
dataclass_annotations: Dict[str, object] = cls.__dict__['__annotations__']
for prop_name, property_type in dataclass_annotations.items():
dataclass_properties[prop_name] = (property_type.__name__, properties_descriptions[prop_name])
return dataclass_properties
except KeyError as ke:
print('This does not work for a class without properties')
#dataclass
class MyDataClass(BaseDataClass):
"""
:param var1: Description for var1
:param var2: Description for var2
"""
var1: int
var2: int
if __name__ == '__main__':
a = BaseDataClass
try:
for property_name, property_metadata in BaseDataClass.get_user_parameters_dict().items():
print(f'{property_name}: {property_metadata[0]} - {property_metadata[1]}')
except AttributeError as ae:
print(f'This class is probably TestuserProperties. Is it? Actual name: {BaseDataClass.__name__}')
print()
for property_name, property_metadata in MyDataClass.get_user_parameters_dict().items():
print(f'{property_name}: {property_metadata[0]} - {property_metadata[1]}')

provide additional constraints when declaring typing.NamedTuple object

Using a typing.NamedTuple object, what is the best way to enforce additional constraints on how it can be declared?
Let's say I have a Undergraduate class where the students have have a major but I want to enforce that 'undeclared' is an unacceptable value for the major.
from typing import NamedTuple
class Undergraduate(NamedTuple):
name: str
major: str
def check_major(self):
if self.major == "undeclared":
raise ValueError("must declare a major")
if __name__ == "__main__":
u1 = Undergraduate("Jane", "computer science") # no errors
u1.check_major() # no errors
u2 = Undergraduate("John", "undeclared") # no errors
u2.check_major() # ValueError
This works fine but I would like for check_major() to run every time I declare a new object, ie:
u1 = Undergraduate("John", "undeclared") # immediate ValueError raised
Is this possible using only a NamedTuple (I know how to do it using traditional classes)?
Note: I read this related question. These solutions provide somewhat of a working solution, but like the OP I want to be able to instantiate the objects without requiring additional class methods to be called.
NamedTuple protects both __init__ and __new__ from being replaced at declaration. However, they can be replaced after the class was created.
class Radial2D(NamedTuple):
angle: float
length: float
def _verify_attributes_(self, *args):
if self.length < 0 or not 0 < self.angle < 360:
raise ValueError('Arguments out of range')
Radial2D.__init__ = Radial2D._verify_attributes_
print(Radial2D(90, 15.5)) # Radial2D(angle=90, length=15.5)
print(Radial2D(12, -5)) # ValueError: Arguments out of range
This pattern can be simplified using a class decorator:
from typing import Type, NamedTuple
def verify(tp: Type[NamedTuple]):
verifier = tp._verify_attributes_
tp.__init__ = verifier
return tp
#verify
class Undergraduate(NamedTuple):
name: str
major: str
def _verify_attributes_(self, *args):
if self.major == "undeclared":
raise ValueError("must declare a major")
print(Undergraduate("Jane", "computer science")) # Undergraduate(name='Jane', major='computer science')
print(Undergraduate("John", "undeclared")) # ValueError: must declare a major

Adding UUID type to Cerberus leads to BAD_TYPE error

I'm attempting to add custom data types to Cerberus. The UUID class works as expected (it's a standard library class) but I'm not able to validate with the UUID type using Cerberus.
Secondarily I was not able to register multiple types in an an __init__ function but that probably should be it's own question.
Here's my custom validator that should register the given types.
import cerberus
class UUID:
name = "UUID"
def __init__(self, potential_uuid: str):
self.uuid = uuid.UUID(potential_uuid)
def __str__(self):
return str(self.uuid)
class Validator(cerberus.Validator):
def _register_types(self) -> cerberus.Validator.types_mapping:
types_mapping = Validator.types_mapping.copy()
for schema_type in datatypes.ALL_TYPES:
cerberus_type = cerberus.TypeDefinition(
schema_type.name,
(schema_type,),
())
types_mapping[schema_type.name] = cerberus_type
return types_mapping
cerberus_type = cerberus.TypeDefinition(
"UUID",
(datatypes.UUID,),
())
types_mapping = cerberus.Validator.types_mapping.copy()
types_mapping["UUID"] = cerberus_type
#def __init__(self, *args, **kwargs ):
# types_mapping = self._register_types()
# super().__init__(*args, **kwargs)
And here's my unit tests for this code.
#pytest.mark.unit
def test_valid_uuid():
test_input = "35d6d5a0-6f37-4794-a493-2712eda41c1a"
actual = UUID(test_input)
assert str(actual) == "35d6d5a0-6f37-4794-a493-2712eda41c1a"
#pytest.mark.unit
def test_invalid_uuid():
test_input = "Not a Valid UUID"
with pytest.raises(ValueError):
actual = UUID(test_input)
#pytest.mark.unit
def test_uuid_type_registration():
test_schema = {"test_name": {"type": "UUID"}}
validator = Validator(test_schema)
test_record = {"test_name": "35d6d5a0-6f37-4794-a493-2712eda41c1a"}
result = validator.validate(test_record)
print(validator._errors)
assert result == True
If we just give the UUID class a valid UUID it succeeds but if we attempt to validate it through Cerberus we get a BAD_TYPE validation error.
pytest tests/test_datatypes/test_datatypes.py
============================================================================================================================= test session starts ==============================================================================================================================
platform linux -- Python 3.7.2, pytest-4.3.1, py-1.8.0, pluggy-0.9.0
benchmark: 3.2.2 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /home/vdev, inifile: setup.cfg
plugins: cov-2.6.1, benchmark-3.2.2
collected 4 items
tests/test_datatypes/test_datatypes.py ...F [100%]
=================================================================================================================================== FAILURES ===================================================================================================================================
_________________________________________________________________________________________________________________________ test_uuid_type_registration __________________________________________________________________________________________________________________________
#pytest.mark.unit
def test_uuid_type_registration():
test_schema = {"test_name": {"type": "UUID"}}
validator = Validator(test_schema)
test_record = {"test_name": "35d6d5a0-6f37-4794-a493-2712eda41c1a"}
result = validator.validate(test_record)
print(validator._errors)
> assert result == True
E assert False == True
tests/test_datatypes/test_datatypes.py:30: AssertionError
----------------------------------------------------------------------------------------------------------------------------- Captured stdout call -----------------------------------------------------------------------------------------------------------------------------
[ValidationError # 0x7fa477e10278 ( document_path=('test_name',),schema_path=('test_name', 'type'),code=0x24,constraint="UUID",value="35d6d5a0-6f37-4794-a493-2712eda41c1a",info=() )]
=============================================================================================================================== warnings summary ===============================================================================================================================
/usr/local/lib/python3.7/site-packages/cerberus/validator.py:14
/usr/local/lib/python3.7/site-packages/cerberus/validator.py:14
/usr/local/lib/python3.7/site-packages/cerberus/validator.py:14
/usr/local/lib/python3.7/site-packages/cerberus/validator.py:14
/usr/local/lib/python3.7/site-packages/cerberus/validator.py:14: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
from collections import Hashable, Iterable, Mapping, Sequence
/usr/local/lib/python3.7/site-packages/cerberus/errors.py:6
/usr/local/lib/python3.7/site-packages/cerberus/errors.py:6: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
from collections import defaultdict, namedtuple, MutableMapping
/usr/local/lib/python3.7/site-packages/cerberus/schema.py:3
/usr/local/lib/python3.7/site-packages/cerberus/schema.py:3: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
from collections import (Callable, Hashable, Iterable, Mapping,
-- Docs: https://docs.pytest.org/en/latest/warnings.html
================================================================================================================ 1 failed, 3 passed, 6 warnings in 0.33 seconds ===============================================================================================================
EDIT 1
Simplified example code
import cerberus
import uuid
class Validator(cerberus.Validator):
types_mapping = {
**cerberus.Validator.types_mapping,
'UUID': cerberus.TypeDefinition('UUID', (uuid.UUID,), ())
}
Same failure
#pytest.mark.unit
def test_uuid_type_registration():
test_schema = {"test_name": {"type": "UUID"}}
validator = es_client.Validator(test_schema)
test_record = {"test_name": "35d6d5a0-6f37-4794-a493-2712eda41c1a"}
result = validator.validate(test_record)
print(validator._errors)
> assert result == True
E assert False == True
tests/test_datatypes/test_datatypes.py:30: AssertionError
------------------------------------------------------------ Captured stdout call -------------------------------------------------------------
[ValidationError # 0x7fd9cdeed0b8 ( document_path=('test_name',),schema_path=('test_name', 'type'),code=0x24,constraint="UUID",value="35d6d5a0-6f37-4794-a493-2712eda41c1a",info=() )]
Could you clarify what the _register_types method is meant to do and when it is called?
This works, maybe it helps you find your error:
def test_issue_475():
class UUID:
def __init__(self, data):
self.data = data
class MyValidator(Validator):
types_mapping = {
**Validator.types_mapping,
'UUID': TypeDefinition('UUID', (UUID,), ())
}
assert_success(
{'field': UUID(0)},
{'field': {'type': 'UUID'}},
validator=MyValidator()
)
Note that you mention the sdtlib's UUID class while you implement another one with the same name in your example.

Resources