Specify a one or many field using marshmallow - python-3.x

I would like to specify a schema field wichi accepts one or many resources. However I seem only able to specify one behavior or the other.
>>> class Resource(marshmallow.Schema):
... data = marshmallow.fields.Dict()
...
>>> class ContainerSchema(marshmallow.Schema):
... resource = marshmallow.fields.Nested(ResourceSchema, many=True)
...
>>> ContainerSchema().dump({'resource': [{'data': 'DATA'}]})
MarshalResult(data={'resource': [{'data': 'DATA'}]}, errors={})
In the above example a list must be defined. However I would prefer not to:
>>> ContainerSchema().dump({'resource': {'data': 'DATA'}})
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/lib64/python3.6/site-packages/marshmallow/schema.py", line 513, in dump
**kwargs
File "/lib64/python3.6/site-packages/marshmallow/marshalling.py", line 147, in serialize
index=(index if index_errors else None)
File "/lib64/python3.6/site-packages/marshmallow/marshalling.py", line 68, in call_and_store
value = getter_func(data)
File "/lib64/python3.6/site-packages/marshmallow/marshalling.py", line 141, in <lambda>
getter = lambda d: field_obj.serialize(attr_name, d, accessor=accessor)
File "/lib64/python3.6/site-packages/marshmallow/fields.py", line 252, in serialize
return self._serialize(value, attr, obj)
File "/lib64/python3.6/site-packages/marshmallow/fields.py", line 448, in _serialize
schema._update_fields(obj=nested_obj, many=self.many)
File "/lib64/python3.6/site-packages/marshmallow/schema.py", line 760, in _update_fields
ret = self.__filter_fields(field_names, obj, many=many)
File "/lib64/python3.6/site-packages/marshmallow/schema.py", line 810, in __filter_fields
obj_prototype = obj[0]
KeyError: 0
Can I have a schema allowing both a single item or many of it?

The point with giving the arguments as a list - whether it's one or many - is so the schema knows how to handle it in either case. For the schema to process arguments of a different format, like not in a list, you need to add a preprocessor to the schema, like this:
class ContainerSchema(marshmallow.Schema):
resource = marshmallow.fields.Nested(ResourceSchema, many=True)
#pre_dump
def wrap_indata(self, indata):
if type(indata['resource']) is dict:
indata['resource'] = [indata['resource']]

Related

Defining a Python Apache Beam schema from dictionary rows

I would like to obtain row schemas in Apache Beam (Python) for use with SQL transforms. However, I ran into the issue explained below.
The schema is defined as follows:
class RowSchema(typing.NamedTuple):
colA: str
colB: typing.Optional[str]
coders.registry.register_coder(RowSchema, coders.RowCoder)
The following example infers the schema correctly:
with beam.Pipeline(options=pipeline_options) as p:
pcol = (p
| "Create" >> beam.Create(
[
RowSchema(colA='a1', colB='b1'),
RowSchema(colA='a2', colB=None)])
.with_output_types(RowSchema)
| beam.Map(print)
)
The following attempt, however, raises "ValueError: Type names and field names must be valid identifiers: 'run.<locals>.RowSchema'"
with beam.Pipeline(options=pipeline_options) as p:
pcol = (p
| "Create" >> beam.Create(
[
{'colA': 'a1', 'colB': 'b1'},
{'colA': 'a2', 'colB': None}])
| 'ToRow' >> beam.Map(
lambda x: RowSchema(**x)) \
.with_output_types(RowSchema)
| beam.Map(print)
)
Full stack trace:
Traceback (most recent call last):
File "/usr/lib/python3.9/runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.9/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "home/src/main.py", line 326, in <module>
run()
File "home/src/main.py", line 267, in run
| 'ToRow' >> beam.Map(lambda x: RowSchema(**x)).with_output_types(RowSchema)
File "home/lib/python3.9/site-packages/apache_beam/transforms/core.py", line 1661, in Map
pardo = FlatMap(wrapper, *args, **kwargs)
File "home/lib/python3.9/site-packages/apache_beam/transforms/core.py", line 1606, in FlatMap
pardo = ParDo(CallableWrapperDoFn(fn), *args, **kwargs)
File "home/lib/python3.9/site-packages/apache_beam/transforms/core.py", line 1217, in __init__
super().__init__(fn, *args, **kwargs)
File "home/lib/python3.9/site-packages/apache_beam/transforms/ptransform.py", line 861, in __init__
self.fn = pickler.loads(pickler.dumps(self.fn))
File "home/lib/python3.9/site-packages/apache_beam/internal/pickler.py", line 51, in loads
return desired_pickle_lib.loads(
File "home/lib/python3.9/site-packages/apache_beam/internal/dill_pickler.py", line 289, in loads
return dill.loads(s)
File "home/lib/python3.9/site-packages/dill/_dill.py", line 275, in loads
return load(file, ignore, **kwds)
File "home/lib/python3.9/site-packages/dill/_dill.py", line 270, in load
return Unpickler(file, ignore=ignore, **kwds).load()
File "home/lib/python3.9/site-packages/dill/_dill.py", line 472, in load
obj = StockUnpickler.load(self)
File "home/lib/python3.9/site-packages/dill/_dill.py", line 788, in _create_namedtuple
t = collections.namedtuple(name, fieldnames)
File "/usr/lib/python3.9/collections/__init__.py", line 390, in namedtuple
raise ValueError('Type names and field names must be valid '
ValueError: Type names and field names must be valid identifiers: 'run.<locals>.RowSchema'
The failed attempt works if I change the schema definition to
RowSchema = typing.NamedTuple('RowSchema', [('colA', str), ('colB', typing.Optional[str])])
The error snippet seems to be correctly formatted according to some of the references below.
References:
Apache Beam infer schema using NamedTuple (Python)
https://beam.apache.org/documentation/programming-guide/#inferring-schemas
https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/wordcount_xlang_sql.py
https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/sql_taxi.py
Tested on Python 3.9, Beam 2.37.0, and multiple runners including DirectRunner, DataflowRunner and PortableRunner.
Solved it by simply moving the schema definition outside the run function.
class RowSchema(typing.NamedTuple):
colA: str
colB: typing.Optional[str]
coders.registry.register_coder(RowSchema, coders.RowCoder)
def run(argv=None, save_main_session=True):
...
with beam.Pipeline(options=pipeline_options) as p:
...

What type do I need to pass to multiprocessing.Value for tables object

I have a table object that I want to pass it to multiple threads. I use multiprocessing.Value function to create a semaphore for that object. However, it tells me that Float32Atom is not hashable. Not sure what to do in this case?
>>> import tables as tb
>>> f = tb.open_file('dot.h5', 'w')
>>> filters = tb.Filters(complevel=5, complib='blosc')
>>> n_ = 10000
>>> W_hat = f.create_carray(f.root, 'data', tb.Float32Atom(), shape=(n_, n_), filters=filters)
>>> W_hat = Value(tb.Float32Atom(), W_hat)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/lib/python3.8/multiprocessing/context.py", line 135, in Value
return Value(typecode_or_type, *args, lock=lock,
File "/home/lib/python3.8/multiprocessing/sharedctypes.py", line 74, in Value
obj = RawValue(typecode_or_type, *args)
File "/home/lib/python3.8/multiprocessing/sharedctypes.py", line 48, in RawValue
type_ = typecode_to_type.get(typecode_or_type, typecode_or_type)
TypeError: unhashable type: 'Float32Atom'
If it is correct that you have only threads (and no processes), you can just use multiprocessing.Semaphore. All threads run in the same context, so you can use it for all of them.
see https://docs.python.org/3/library/threading.html#semaphore-objects

Dialogflow v2 Beta 1 Update Intent with Python

I am lost.
I have a currently existing Intent in my project, and I am trying to update ALL fields programmatically because that is what my project requires.
I read this documentation and checked this source file on GitHub, and I think the reason I am getting an error is because I don't understand this part of the source:
Args:
intent (Union[dict, ~google.cloud.dialogflow_v2beta1.types.Intent]): Required. The intent
to update.
Format: projects/<Project ID>/agent/intents/<Intent ID>.
If a dict is provided, it must be of the same form as the protobuf
message :class:~google.cloud.dialogflow_v2beta1.types.Intent
(Line 484 for reference)
The platform works great I just don't know what I am missing here..
My code
from constants import *
from google.oauth2 import service_account
import dialogflow_v2beta1
cred = service_account.Credentials.from_service_account_file(AUTH_JSON)
client = dialogflow_v2beta1.IntentsClient(credentials=cred)
params = dialogflow_v2beta1.types.Intent.Parameter(name='test', display_name='test', value='test', is_list=True)
t = dialogflow_v2beta1.types.Intent.Message.Text(text='TEST TEXT')
m = dialogflow_v2beta1.types.Intent.Message(text=t)
p = dialogflow_v2beta1.types.Intent.TrainingPhrase.Part(text='test',entity_type='#test_type', alias='test_alias', user_defined=True)
t = dialogflow_v2beta1.types.Intent.TrainingPhrase(name='test',type=2, parts=[p])
modified_intent = dialogflow_v2beta1.types.Intent(
display_name='test',
messages=[m],
webhook_state=1,
is_fallback=False,
ml_disabled=False,
input_context_names=PROJECT_DIR+'agent/sessions/-/contexts/' + 'TEST_CONTEXT',
events='TESTING EVENT',
training_phrases=[t],
action='TESTING ACTION',
reset_contexts=False,
parameters=[params]
)
name = client.intent_path(PROJECT_NAME, '7b8f2105-53d4-4724-8d4c-0170b8db7028')
intent = client.get_intent(name)
client.update_intent(intent=modified_intent, language_code=LANGUAGE_CODE, intent_view=0)
Full error message
Traceback (most recent call last):
File "/anaconda/envs/data/lib/python3.6/site-packages/google/api_core/grpc_helpers.py", line 57, in error_remapped_callable
return callable_(*args, **kwargs)
File "/anaconda/envs/data/lib/python3.6/site-packages/grpc/_channel.py", line 550, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "/anaconda/envs/data/lib/python3.6/site-packages/grpc/_channel.py", line 467, in _end_unary_response_blocking
raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
status = StatusCode.INVALID_ARGUMENT
details = "Resource name '' does not match 'projects/*/agent/intents/*'."
debug_error_string = "{"created":"#1552461629.958860000","description":"Error received from peer","file":"src/core/lib/surface/call.cc","file_line":1036,"grpc_message":"Resource name '' does not match 'projects/*/agent/intents/*'.","grpc_status":3}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "test.py", line 26, in <module>
client.update_intent(intent=modified_intent, language_code=LANGUAGE_CODE, intent_view=0)
File "/anaconda/envs/data/lib/python3.6/site-packages/dialogflow_v2beta1/gapic/intents_client.py", line 535, in update_intent
request, retry=retry, timeout=timeout, metadata=metadata)
File "/anaconda/envs/data/lib/python3.6/site-packages/google/api_core/gapic_v1/method.py", line 143, in __call__
return wrapped_func(*args, **kwargs)
File "/anaconda/envs/data/lib/python3.6/site-packages/google/api_core/retry.py", line 270, in retry_wrapped_func
on_error=on_error,
File "/anaconda/envs/data/lib/python3.6/site-packages/google/api_core/retry.py", line 179, in retry_target
return target()
File "/anaconda/envs/data/lib/python3.6/site-packages/google/api_core/timeout.py", line 214, in func_with_timeout
return func(*args, **kwargs)
File "/anaconda/envs/data/lib/python3.6/site-packages/google/api_core/grpc_helpers.py", line 59, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.InvalidArgument: 400 Resource name '' does not match 'projects/*/agent/intents/*'.
You are able to get the intent which you want to modify correctly by using
name = client.intent_path(PROJECT_NAME, your_intent_id)
You will get complete definition of your intent.
Then you need to change the values of this intent by accessing them and assigning your values.
After that, you need to pass the same intent in your update_intent() function.
It is also advised to use update_mask to avoid changing any other field or setting rest of the fields None.
Here is an example of updating intent display_name from greet to hello:
client = dialogflow.IntentsClient()
intent_name = client.intent_path(project_id, intent_id)
intent = client.get_intent(intent_name, intent_view=dialogflow.enums.IntentView.INTENT_VIEW_FULL)
intent.display_name = 'hello'
update_mask = field_mask_pb2.FieldMask(paths=['display_name'])
print(response)
You will need ope extra import as well in your code:
from google.protobuf import field_mask_pb2
This way, intent's display_name will be changed.
You can do same for the rest of your properties as well. Just remember to pass the value which the property is expecting by following this documentation and you can take help from this issue as well.
Hope it helps.

How to split strings from a CSV column into a list?

I want to make a list out of the words of a CSV column called 'text' that is composed of strings
This is what I have:
def html_words():
legits_text = pd.read_csv('/Users/pmpilla/Documents/phishing/html_text.csv', delimiter=',')
list_text = legits_text["text"].split(" ")
This is the error that I am getting:
> Traceback (most recent call last):
File "/Users/pmpilla/Documents/phishing/html_words/legit_path_words.py", line 70, in <module>
html_words()
File "/Users/pmpilla/Documents/phishing/html_words/legit_path_words.py", line 30, in html_words
list_text = legits_text["text"].split(" ")
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/generic.py", line 3614, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'Series' object has no attribute 'split'
What you might need to do is:
list_text = legits_text["text"].str.split(" ")
you might also need to use the parameter expand=True to create new columns instead of a list.
Refer:
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.str.split.html

exchangelib get task from task queryset MIME conversion is not supported for this item

I am trying to access an object
through a queryset created with exchangelib however I get an error MIME CONVERSION IS NOT SUPPORTED FOR THIS ITEM, I don't know what it means. I have tried the same code with calendar items and I have no problem whatsoever. thanks
from exchangelib import Account, Credentials, DELEGATE
credentials = Credentials(username='BUREAU\\pepe', password='pepe')
pepe_account = Account(
primary_smtp_address='pepe#office.com',
credentials=credentials,
autodiscover=True,
access_type=DELEGATE)
tasks = pepe_account.tasks.filter()
print(tasks) -- Works
for task in tasks:
print(task)
The iteration fails, instead of print(task) I also tried pass and I get the same message.
Traceback (most recent call last):
File "test.py", line 13, in <module>
for task in tasks:
File "/home/pepe/.local/lib/python3.5/site-packages/exchangelib/queryset.py", line 197, in __iter__
for val in result_formatter(self._query()):
File "/home/pepe/.local/lib/python3.5/site-packages/exchangelib/queryset.py", line 272, in _as_items
for i in iterable:
File "/home/pepe/.local/lib/python3.5/site-packages/exchangelib/account.py", line 393, in fetch
for i in GetItem(account=self).call(items=ids, additional_fields=additional_fields, shape=IdOnly):
File "/home/pepe/.local/lib/python3.5/site-packages/exchangelib/services.py", line 456, in _pool_requests
for elem in elems:
File "/home/pepe/.local/lib/python3.5/site-packages/exchangelib/services.py", line 283, in _get_elements_in_response
container_or_exc = self._get_element_container(message=msg, name=self.element_container_name)
File "/home/pepe/.local/lib/python3.5/site-packages/exchangelib/services.py", line 256, in _get_element_container
self._raise_errors(code=response_code, text=msg_text, msg_xml=msg_xml)
File "/home/pepe/.local/lib/python3.5/site-packages/exchangelib/services.py", line 273, in _raise_errors
raise vars(errors)[code](text)
exchangelib.errors.ErrorUnsupportedMimeConversion: MIME conversion is not supported for this item type.

Resources