If condition with or in python - python-3.x

If i have list containing Null empty string and some value then i can i use condition for both empty and value.In my case if i am using to check "89" and "Null" it is not processing message for value instead shows empty value can not processed.Value should be processed for value and not for Null condition.
payload=[{'id': 'Room1',
'pressure': {'metadata': {}, 'type': 'Number', 'value': 'Null'},
`'temperature':{'metadata': {}, 'type': 'Number', 'value': '89`'},
'type': 'RoomTest'}]
attrs=['temperature','pressure']
x=(len(payload))
for i in range(x):
for j in attrs:
y=payload[i][j]['value']
print(y)
for item in y:
print(item)
if item is ["Null", ""]:
print("empty value can not processed")
if item is not["Null",""]:
print("successfully processed for value")
I have also tried using "any" but still same result.
Thanks in Advance.

You can do this
payload=[{'id': 'Room1',
'pressure': {'metadata': {}, 'type': 'Number', 'value': 'Null'},
'temperature':{'metadata': {}, 'type': 'Number', 'value': '89'},
'type': 'RoomTest'}]
attrs=['temperature','pressure']
for item in payload:
for attr in attrs:
value = item[attr]['value']
if value in ["Null", ""]:
print("empty value can not processed")
else:
print("successfully processed for value")

Related

How do I count of how many documents the field x is empty inside a certian collection?

I have problem. I have a collection orders. I want to check how many documents with the field phone are empty.
So how could I count of how many documents the field phone is empty inside the collection orders?
This my collection orders:
[
{'_id': 'orders/213123',
'contactEditor': {'name': 'Max Power',
'phone': '1234567',
'email': 'max#power.com'},
'contactSoldToParty': {'name': 'Max Not',
'phone': '123456789',
'email': 'maxnot#power.com'},
'isCompleteDelivery': False,
'metaData': {'dataOriginSystem': 'Goods',
'dataOriginWasCreatedTime': '10:12:12',},
'orderDate': '2021-02-22',
'orderDateBuyer': '2021-02-22',
},
{'_id': 'orders/12323',
'contactEditor': {'name': 'Max Power2',
'phone': '1234567',
'email': 'max#power.com'},
'contactSoldToParty': {'name': 'Max Not',
'phone': '123456789',
'email': 'maxnot#power.com'},
'isCompleteDelivery': False,
'metaData': {'dataOriginSystem': 'Goods',
'dataOriginWasCreatedTime': '10:12:12',},
'orderDate': '2021-02-22',
'orderDateBuyer': '2021-02-22',
},
]
If you're trying to FILTER for a value that is null...
FOR o IN orders
FILTER o.contactSoldToParty.phone == null
RETURN o
But if you just want a simple count, then I would use COLLECT (see the docs)...
FOR o IN orders
COLLECT hasPhone = (o.contactSoldToParty.phone != null) WITH COUNT INTO total
RETURN { hasPhone, total }
There are two caveats, both related to how your document is structured:
you may have to first check if the contactSoldToParty attribute exists (or use nullish coalescing)
be sure that the phone attribute is indeed empty/missing/null - null is not the same as an empty string ('')

Filtering a list of nested dictionary

I am receiving the following response in a list of nested dictionaries format:
list_of_dicts = [{
'id': '11593636317',
'properties': {
'created_date': '2021-09-28T16:16:31.635Z',
'modified_date': '2021-09-28T16:16:31.635Z',
'note': 'Test Note 123',
'id': '11593636317'},
'created_date': '2021-09-28T16:16:31.635Z',
'updated_date': '2021-09-28T16:16:31.635Z',
'archived': False
},
{
'id': '11593636318',
'properties': {
'created_date': '2021-09-28T16:16:31.635Z',
'modified_date': '2021-09-28T16:16:31.635Z',
'note': 'Ticket Note',
'id': '11593636318'},
'created_date': '2021-09-28T16:16:31.635Z',
'updated_date': '2021-09-28T16:16:31.635Z',
'archived': False
}
]
However, I don't need all of the records for a specific action. For that, I am trying to filter all records which note fields starts with the word Ticket.
For that I tried:
filtered_notes = []
for note in list_of_dicts:
if note['properties']['note'].startswith('Ticket'):
filtered_notes.append(note['id'])
Unfortunately, I am running into the following error and I have no clue how to get around it:
AttributeError: 'NoneType' object has no attribute 'startswith'
You can do:
filtered_notes = []
for note in list_of_dicts:
try:
if note['properties']['note'].startswith('Ticket'):
filtered_notes.append(note['id'])
except (KeyError, AttributeError):
pass
The try/except block will protect you in case some of the needed keys are missing or the note property values has an unexpected type.

how to define bigquery schema when build apache beam data pipeline

I create a data pipeline with apache beam, but it can not insert the data to bigquery.
I use beam.ParDo to process the data, and yield the data row by row,
below is the code.
project = 'project_name'
dataset = 'XXX'
class parser_data(beam.DoFn):
def process(self, data):
ZZ = [{"NN":d["NNN"], "descrip":d} for d in data["colZ"]]
ret = pd.DataFrame(data['colD'])
ret["colA"] = data["colA"]
ret["colB"] = data["colB"]
ret["colC"] = data["colC"]
ret = pd.merge(ret, pd.DataFrame(ZZ), on=["NN"], how="left")
ret = ret[["colA", "colB", "colC", "NN", "sample", "descrip"]]
print(ret)
ret_dict = ret.to_dict("records")
print(ret_dict)
for i in range(len(ret_dict)):
yield ret_dict[i]
options = PipelineOptions(
runner = 'DirectRunner',
region = 'us-west1',
project = project,
job_name = "test-tmp",
streaming = False,
setup_file = './setup.py',
subnetwork = "XXXXXXX",
service_account_email = "XXXXXX",
temp_location='XXXXXX',
staging_location="XXXXXX",
use_public_ips = False
)
d = {
'colA': '1',
'colB': 'Strawberry',
'colC': 2,
'colD': [{"NN":"AA", "sample":1}, {"NN":"AA", "sample":2}, {"NN":"BB", "sample":3}, {"NN":"CC", "sample":4}, {"NN":"CC", "sample":5}],
'colZ': [{"NNN":"AA", "name":"123", "timeperiod":"152"}, {"NNN":"BB", "name":"1212513", "timeperiod":"1952"}, {"NNN":"CC", "name":"13", "timeperiod":"14152"}],
}
schema = {
'fields':[
{'name': 'colA', 'type': 'STRING', 'mode': 'REQUIRED'},
{'name': 'colB', 'type': 'STRING', 'mode': 'REQUIRED'},
{'name': 'colC', 'type': 'STRING', 'mode': 'REQUIRED'},
{'name': 'NN', 'type': 'STRING', 'mode': 'REQUIRED'},
{'name': 'sample', 'type': 'STRING', 'mode': 'REQUIRED'},
{
'name': 'descrip', 'type': 'RECORD', 'mode': 'NULLABLE',
'fields':[
{"name": "NNN", "type": "STRING", 'mode': 'NULLABLE'},
{"name": "name", "type": "STRING", 'mode': 'NULLABLE'},
{"name": "timeperiod", "type": "STRING", 'mode': 'NULLABLE'},
]
},
]
}
with beam.Pipeline(options=options) as pipeline:
data = (
pipeline | 'get data' >> beam.Create([d])
)
ret_A = (
data | "Process A data " >> beam.ParDo(parser_data())
| "Insert data into BQ" >> beam.io.WriteToBigQuery(
f"{project}:{dataset}.TestJsonData",
schema=schema,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND
)
)
The error is below
RuntimeError: BigQuery job beam_bq_job_LOAD_testtmp_LOAD_STEP_820_9672b886a985a9a36a9c3805cee3be5e_3f26019c07d746ef92c0893574156f5b failed. Error Result: <ErrorProto
location: 'gs://XXXXXXXX/dataflow_temp/bq_load/db11e8430c10470382be2565136d53fb/{project}.{dataset}.TestJsonData/39e0d645-8484-4033-a1c4-3e4a825d6fee'
message: 'Error while reading data, error message: JSON table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the errors[] collection for more details.'
reason: 'invalid'> [while running '[25]: Insert data into BQ/BigQueryBatchFileLoads/WaitForDestinationLoadJobs']
Also the print function show the data, so I think the problem is in the bigquery schema, but I can not find it
Anyone have any idea?

Python dictionary, get values by key name

I have nested dictionary , trying to iterate over it and get the values by key,
I have a payload which has route as main node, inside route i have many waypoints, i would like to iterate over all way points and sets the value based on key name into a protobuff variable.
sample code below:
'payload':
{
'route':
{
'name': 'Argo',
'navigation_type': 2,
'backtracking': False,
'continuous': False,
'waypoints':
{
'id': 2,
'coordinate':
{
'type': 0,
'x': 51.435989,
'y': 25.32838,
'z': 0
},
'velocity': 0.55555582,
'constrained': True,
'action':
{
'type': 1,
'duration': 0
}
}
'waypoints':
{
'id': 2,
'coordinate':
{
'type': 0,
'x': 51.435989,
'y': 25.32838,
'z': 0
},
'velocity': 0.55555582,
'constrained': True,
'action':
{
'type': 1,
'duration': 0
}
}
},
'waypoint_status_list':
{
'id': 1,
'status': 'executing'
},
'autonomy_status': 3
},
#method to iterate over payload
def get_encoded_payload(self, payload):
#1 fill route proto from payload
a = payload["route"]["name"] #working fine
b = payload["route"]["navigation_type"] #working fine
c = payload["route"]["backtracking"] #working fine
d = payload["route"]["continuous"] #working fine
self.logger.debug(type(payload["route"]["waypoints"])) # type is dict
#iterate over waypoints
for waypoint in payload["route"]["waypoints"]:
wp_id = waypoint["id"] # Error, string indices must be integer
i would like to iterate over all waypoints and set the value of each key value to a variable
self.logger.debug(type(payload["route"]["waypoints"])) # type is dict
Iterating over a dict gives you its keys. Your later code seems to be expecting multiple waypoints as a list of dicts, which would work, but that's not what your structure actually contains.
Try print(waypoint) and see what you get.

How to add new key in the existing dictionary and derive a nested dictionary from it in python?

Iam trying to add new key inside the existing dictionary to create a new nested dictionary
Below is the existing dictionary
I need to make a nested dictionary from the below dictionary
{'userId': 'thanks',
'jobTitleName': 'Program Directory',
'firstName': 'Tom', 'lastName': 'Hanks',
'preferredFullName': 'Tom Hanks',
'employeeCode': 'E3',
'region': 'CA',
'phoneNumber': '+00408-2222222',
'emailAddress': 'tomhanks#gmail.com',
'Full Name': 'TomHanks'}
This is what i tried:
key1=['userId','jobTitleName','firstName','lastName','employeeCode']
key2=['Full Name','phoneNumber','region','emailAddress']
jsonValue={
{'userId': 'thanks',
'jobTitleName': 'Program Directory',
'firstName': 'Tom', 'lastName': 'Hanks',
'preferredFullName': 'Tom Hanks',
'employeeCode': 'E3',
'region': 'CA',
'phoneNumber': '+00408-2222222',
'emailAddress': 'tomhanks#gmail.com',
'Full Name': 'TomHanks'}
}
empDetails={}
for k in key1:
empDetails[k]=jsonValue[k]
print("Key1", empDetails)
for k2 in key2:
empDetails['otherDetails'][k2]=jsonValue[k2]
But its not working
Expected:
Now i need to add new key as 'otherDetails' to derive a nested dictionary as follows
{'userId': 'thanks',
'jobTitleName': 'Program Directory',
'firstName': 'Tom', 'lastName': 'Hanks',
'preferredFullName': 'Tom Hanks',
'employeeCode': 'E3',
otherDetails{
'region': 'CA',
'phoneNumber': '+00408-2222222',
'emailAddress': 'tomhanks#gmail.com',
'Full Name': 'TomHanks'
}
}
Appreciate if anyone can give right solution?
Thanks
There are a couple of Problems in you code. First in your jsonValue you put a dict inside of a dict, but don't specify a key here. From context I assume you actually want to use an Array here (since you most likely have an array of employee data, If I'm wrong here just comment)
Then you try to assign to empDetails['otherDetails'][k2] however, you never initialize the dict in empDetails['otherDetails'] . because of this you actually try to assign to None (Because empDetails['otherDetails'][k2] will evaluate to None[k2]
key1 = ['userId', 'jobTitleName', 'firstName', 'lastName', 'employeeCode']
key2 = ['Full Name', 'phoneNumber', 'region', 'emailAddress']
jsonValue = [{
'userId': 'thanks',
'jobTitleName': 'Program Directory',
'firstName': 'Tom', 'lastName': 'Hanks',
'preferredFullName': 'Tom Hanks',
'employeeCode': 'E3',
'region': 'CA',
'phoneNumber': '+00408-2222222',
'emailAddress': 'tomhanks#gmail.com',
'Full Name': 'TomHanks'
}
]
for employee in jsonValue:
empDetails = {'otherDetails': {}}
for k in key1:
empDetails[k] = employee[k]
print("Key1", empDetails)
for k2 in key2:
empDetails['otherDetails'][k2] = employee[k2]
print("Key1", empDetails)
Filter out which keys you want to keep, then filter out the keys you want to move to the inner dict, then insert the inner dict.
from pprint import pprint
d = {
"userId": "thanks",
"jobTitleName": "Program Directory",
"firstName": "Tom",
"lastName": "Hanks",
"preferredFullName": "Tom Hanks",
"employeeCode": "E3",
"region": "CA",
"phoneNumber": "+00408-2222222",
"emailAddress": "tomhanks#gmail.com",
"Full Name": "TomHanks",
}
# Outer keys you want to keep
keys_to_keep = {'userId','jobTitleName','firstName','lastName','preferredFullName', 'employeeCode'}
# Keys you want to move into inner dict
keys_to_move = {'Full Name','phoneNumber','region','emailAddress'}
# Create dict to insert into
new_dict = {k: d[k] for k in keys_to_keep}
# Create dict to insert into above dict
insert_dict = {k: d[k] for k in keys_to_move}
# Insert inner dict
new_dict['otherDetails'] = insert_dict
pprint(new_dict)
Output:
{'employeeCode': 'E3',
'firstName': 'Tom',
'jobTitleName': 'Program Directory',
'lastName': 'Hanks',
'userId': 'thanks',
'preferredFullName': 'Tom Hanks',
'otherDetails': {'Full Name': 'TomHanks',
'emailAddress': 'tomhanks#gmail.com',
'phoneNumber': '+00408-2222222',
'region': 'CA'},
}

Resources