How do I find the specific document insert_many() fails on? - python-3.x

if (constant.gc in file.sheet_names):
coll = db[constant.gc]
print("Adding to " + constant.gc + " database")
df = file.parse(constant.gc)
df = clean(df)
data_dict = df.to_dict('r')
try:
result = coll.insert_many(data_dict)
nr_inserts = len(result.inserted_ids)
print(str(nr_inserts) + "Cases added to database")
except pymongo.errors.BulkWriteError as bwe:
nr_inserts = bwe.details["nInserted"]
print(nr_inserts)
I keep getting a NaTType error and I can't find which row of the dataframe has the blank date. Unfortunately, it's off of a 39k row Excel file. So, just looking through isn't going to help. I tried an except that theoretically could tell me how many were successfully inserted until the error, and therefore give me a hint on where to look, but it hasn't printed.
The error looks like this:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Python38\lib\tkinter\__init__.py", line 1883, in __call__
return self.func(*args)
File "dataimport.py", line 71, in importFromExcel
result = coll.insert_many(data_dict)
File "C:\Python38\lib\site-packages\pymongo\collection.py", line 758, in insert_many
blk.execute(write_concern, session=session)
File "C:\Python38\lib\site-packages\pymongo\bulk.py", line 511, in execute
return self.execute_command(generator, write_concern, session)
File "C:\Python38\lib\site-packages\pymongo\bulk.py", line 345, in execute_command
client._retry_with_session(
File "C:\Python38\lib\site-packages\pymongo\mongo_client.py", line 1384, in _retry_with_session
return func(session, sock_info, retryable)
File "C:\Python38\lib\site-packages\pymongo\bulk.py", line 339, in retryable_bulk
self._execute_command(
File "C:\Python38\lib\site-packages\pymongo\bulk.py", line 295, in _execute_command
result, to_send = bwc.execute(ops, client)
File "C:\Python38\lib\site-packages\pymongo\message.py", line 898, in execute
request_id, msg, to_send = self._batch_command(docs)
File "C:\Python38\lib\site-packages\pymongo\message.py", line 890, in _batch_command
request_id, msg, to_send = _do_bulk_write_command(
File "C:\Python38\lib\site-packages\pymongo\message.py", line 1382, in _do_bulk_write_command
return _do_batched_op_msg(
File "C:\Python38\lib\site-packages\pymongo\message.py", line 1307, in _do_batched_op_msg
return _batched_op_msg(
File "pandas\_libs\tslibs\nattype.pyx", line 64, in pandas._libs.tslibs.nattype._make_error_func.f
ValueError: NaTType does not support utcoffset
At a guess, ValueError and BulkWriteError are not the same, so nInserted never prints. Does anyone have an idea of how to get the number of successful inserts before the failure?

I doubt that any inserts are performed as the error is likely occurring before the data is passed to mongodb to insert.
In any case, if you want to hunt down which row in the dataframe has the NaT value, try: (subsitute 'date' for your column containing the date)
null_df = df[pd.isnull(df['date'])]
print(null_df)
To remove null dated items use:
df = df[pd.notnull(df['date'])]

Related

How do I search for a Django model by a primary key that doesn't match its type without throwing an error?

I'm using Django 3 and Python 3.7. I have a model (MySql 8 backed table) that has integer primary keys. I have code that searches for such models like so
state = State.objects.get(pk=locality['state'])
The issue is if "locality['state']" contains an empty string, I get the below error
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/fields/__init__.py", line 1768, in get_prep_value
return int(value)
ValueError: invalid literal for int() with base 10: ''
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/davea/Documents/workspace/chicommons/maps/web/tests/test_serializers.py", line 132, in test_coop_create_with_incomplete_data
assert not serializer.is_valid(), serializer.errors
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/serializers.py", line 234, in is_valid
self._validated_data = self.run_validation(self.initial_data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/serializers.py", line 433, in run_validation
value = self.to_internal_value(data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/serializers.py", line 490, in to_internal_value
validated_value = field.run_validation(primitive_value)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/fields.py", line 565, in run_validation
value = self.to_internal_value(data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/relations.py", line 519, in to_internal_value
return [
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/rest_framework/relations.py", line 520, in <listcomp>
self.child_relation.to_internal_value(item)
File "/Users/davea/Documents/workspace/chicommons/maps/web/directory/serializers.py", line 26, in to_internal_value
state = State.objects.get(pk=locality['state'])
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/manager.py", line 82, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/query.py", line 404, in get
clone = self._chain() if self.query.combinator else self.filter(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/query.py", line 904, in filter
return self._filter_or_exclude(False, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/query.py", line 923, in _filter_or_exclude
clone.query.add_q(Q(*args, **kwargs))
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/sql/query.py", line 1337, in add_q
clause, _ = self._add_q(q_object, self.used_aliases)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/sql/query.py", line 1362, in _add_q
child_clause, needed_inner = self.build_filter(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/sql/query.py", line 1298, in build_filter
condition = self.build_lookup(lookups, col, value)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/sql/query.py", line 1155, in build_lookup
lookup = lookup_class(lhs, rhs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/lookups.py", line 22, in __init__
self.rhs = self.get_prep_lookup()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/lookups.py", line 72, in get_prep_lookup
return self.lhs.output_field.get_prep_value(self.rhs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/django/db/models/fields/__init__.py", line 1770, in get_prep_value
raise e.__class__(
ValueError: Field 'id' expected a number but got ''.
Is there a more "Django" way to search for an object without an error being thrown if the object doesn't exist? I could do this
state = None if str(type(locality['state'])) != "<class 'int'>" else State.objects.get(pk=locality['state'])
but this seems unnecessarily wordy and not how Django was intended to be used.
I would choose Ask forgiveness not permission strategy
try:
state = State.objects.get(pk=int(locality['state']))
except ValueError:
state = None
You could use a logical AND to validate the dict value before using it to look up the data.
state = locality['state'] and State.objects.get(pk=locality['state'])

Python threading causing issues with google api

I'm running through a list of locations and trying to find places along my route. This is my first attempt at threading, so any tips would be appreciated! When i run this it'll work fine for the first few iterations, but then i start getting a KeyError and the API response says route is not found (even though it should be). If I search along a shorter route, everything runs fine. When I extend the route past a couple of hours of drive time I start getting these errors. Is it possible that I'm overloading it or does my code look off?
import pandas as pd
from threading import Thread
import threading
import requests
start_input = input("start: ")
end_input = input("end: ")
out_way = input("out of the way: ")
out_way_secs = int(out_way) * 60
thread_local = threading.local()
def get_session():
if not getattr(thread_local, "session", None):
thread_local.session = requests.Session()
return thread_local.session
def get_routes(url, start, end, waypoint, idx):
session = get_session()
with session.get(url, params={'origins': f'{start}|{waypoint}', 'destinations': f'{start}|{end}',
'key': '# key'}) as response:
route = response.json()
if route['rows'][1]['elements'][0]['status'] != 'OK':
results[idx] = {'# info'}
else:
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
leg1 = route['rows'][1]['elements'][0]['duration']['value']
leg2 = route['rows'][1]['elements'][1]['duration']['value']
time_added = (leg1 + leg2) - nonstop_route
time_added_mins = str(datetime.timedelta(seconds=(leg1 + leg2) - nonstop_route))
more_time = time_added_mins.split(':')
added_time_str = str(f'{more_time[0]}:{more_time[1]}:{more_time[2]} away!')
if time_added < allowable_time:
results[idx] = {# info to return}
return results[idx]
if __name__ == "__main__":
start_time = time.time()
output_df = pd.DataFrame(columns=['Location', 'Added Time', 'Notes'])
threads = [None] * coords[0]
results = [None] * coords[0]
for i in range(len(threads)):
threads[i] = Thread(target=get_routes, args=('https://maps.googleapis.com/maps/api/distancematrix/json',
start_input, end_input, stops[i], i))
threads[i].start()
for i in range(len(threads)):
threads[i].join()
for x in range(len(results)):
output_df = output_df.append(results[x], ignore_index=True)
output_df = output_df.sort_values(['Added Time'], ascending=True)
output_df.to_csv('output.csv', index=False)
there are 3 errors that it will get, this first one pops up by itself and the last 2 will come together. The code is the same when I run it, so not sure why i'm getting different errors.
This is the most common error that comes by itself (the routing duration works fine when run individually):
Exception in thread Thread-171:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:program.py", line 46, in get_routes
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
KeyError: 'duration'
The two below I get together and are less common:
Exception in thread Thread-436:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:/program.py", line 40, in get_routes
route = response.json()
File "C:\requests\models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Python37-32\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Python37-32\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python37-32\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
second error:
Exception in thread Thread-196:
Traceback (most recent call last):
File "C:\site-packages\urllib3\response.py", line 360, in _error_catcher
yield
File "C:\urllib3\response.py", line 442, in read
data = self._fp.read(amt)
File "C:\Python37-32\lib\http\client.py", line 447, in read
n = self.readinto(b)
File "C:\Python37-32\lib\http\client.py", line 491, in readinto
n = self.fp.readinto(b)
File "C:\Python37-32\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "C:\Python37-32\lib\ssl.py", line 1052, in recv_into
return self.read(nbytes, buffer)
File "C:\Python37-32\lib\ssl.py", line 911, in read
return self._sslobj.read(len, buffer)
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\site-packages\requests\models.py", line 750, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "C:\site-packages\urllib3\response.py", line 494, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "C:\site-packages\urllib3\response.py", line 459, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "C:\Python37-32\lib\contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "C:\site-packages\urllib3\response.py", line 378, in _error_catcher
raise ProtocolError('Connection broken: %r' % e, e)
urllib3.exceptions.ProtocolError: ("Connection broken: ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None)", ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))

Pygsheets update_cells() not working

I am trying to update a range of 3 cells horizontally using pygsheets but I am having some issues
The Error:
Traceback (most recent call last):
File "C:\Users\Art\Desktop\Python\Plain\General_testing.py", line 12, in <module>
wks.update_cells('I{0}:K{0}'.format(rows),[output['Name'], output['Age'], output['State']])
File "C:\Users\Art\AppData\Local\Programs\Python\Python36\lib\site-packages\pygsheets\worksheet.py", line 431, in update_cells
self.client.sh_update_range(self.spreadsheet.id, body, self.spreadsheet.batch_mode, parse=parse)
File "C:\Users\Art\AppData\Local\Programs\Python\Python36\lib\site-packages\pygsheets\client.py", line 352, in sh_update_range
self._execute_request(spreadsheet_id, final_request, batch)
File "C:\Users\Art\AppData\Local\Programs\Python\Python36\lib\site-packages\pygsheets\client.py", line 418, in _execute_request
response = request.execute()
File "C:\Users\Art\AppData\Local\Programs\Python\Python36\lib\site-packages\oauth2client\_helpers.py", line 133, in positional_wrapper
return wrapped(*args, **kwargs)
File "C:\Users\Art\AppData\Local\Programs\Python\Python36\lib\site-packages\googleapiclient\http.py", line 842, in execute
raise HttpError(resp, content, uri=self.uri)
googleapiclient.errors.HttpError: <HttpError 400 when requesting https://sheets.googleapis.com/v4/spreadsheets/1P2FvtuOLWZGgVIp-D9ROeBqexXQt1eNv8tSxZdwGrlw/values/Sheet1%21I19%3AK19?valueInputOption=USER_ENTERED&alt=json returned "Invalid value at 'data.values[0]' (type.googleapis.com/google.protobuf.ListValue), "Joe"
Invalid value at 'data.values[1]' (type.googleapis.com/google.protobuf.ListValue), "19"
Invalid value at 'data.values[2]' (type.googleapis.com/google.protobuf.ListValue), "NY"">
Simplified version of my code that produces the same error:
import pygsheets
gc = pygsheets.authorize(service_file='secret.json')
sh = gc.open('TestSheet')
wks = sh.sheet1
row = 1
output = {}
output['Name'] = 'Joe'
output['Age'] = '19'
output['State'] = 'NY'
wks.update_cells('I{0}:K{0}'.format(row),[output['Name'], output['Age'], output['State']])
The update cells take a 2D matrix (list of list) as input for values not a list
wks.update_cells('I{0}:K{0}'.format(row),[[output['Name'], output['Age'], output['State']] ] )
see the docs here

pymysql - python 3.6 on AWS Lambda - handling inserting data into SQL statements

I have been spending hours trying to and searching for how to insert strings and decimals into a sql insert statement. I cannot seem to find an answer that works. I am using AWS Lambda with Python 3.6 to process data. I continue to have issues getting the values of variables to inserted into my SQL statement. I am getting the following error message.
My code:
timeStamp = 123456789.123456
thing = "Testing/IoT"
statement = "INSERT INTO `cycles` (`timeStamp`, `thing`) VALUES ({}, {})"
theData = (timeStamp, thing)
logger.info(statement, theData)
cursor = conn.cursor()
cursor.execute(statement, theData)
conn.commit()
Error message:
not all arguments converted during string formatting: TypeError
Traceback (most recent call last): File
"/var/task/recordCyclesRDS.py", line 56, in handler
logger.info(statement, theData) File
"/var/lang/lib/python3.6/logging/init.py", line 1306, in info
self._log(INFO, msg, args, **kwargs) File
"/var/lang/lib/python3.6/logging/init.py", line 1442, in _log
self.handle(record) File
"/var/lang/lib/python3.6/logging/init.py", line 1452, in handle
self.callHandlers(record) File
"/var/lang/lib/python3.6/logging/init.py", line 1514, in
callHandlers hdlr.handle(record) File
"/var/lang/lib/python3.6/logging/init.py", line 863, in handle
self.emit(record) File "/var/runtime/awslambda/bootstrap.py", line
442, in emit msg = self.format(record) File
"/var/lang/lib/python3.6/logging/init.py", line 838, in format
return fmt.format(record) File
"/var/lang/lib/python3.6/logging/init.py", line 575, in format
record.message = record.getMessage() File
"/var/lang/lib/python3.6/logging/init.py", line 338, in getMessage
msg = msg % self.args TypeError: not all arguments converted during
string formatting
I can make this work:
statement = "INSERT INTO `cycles` (`timeStamp`, `thing`) VALUES (123456.123456, 'Testing/IoT')"
Try this:
timeStamp = 123456789.123456
thing = "Testing/IoT"
statement = "INSERT INTO `cycles` (`timeStamp`, `thing`) VALUES ({}, {})"
theData = (str(timeStamp), thing)
logger.info(statement, theData)
cursor = conn.cursor()
cursor.execute(statement, theData)
conn.commit()

Strange deform/colander behaviour "string indices must be integers"

So I have the following working code, that I have been using for the past few months
class UserSchema(colander.MappingSchema):
dob = colander.SchemaNode(
colander.Date(),
title='Date of birth:')
if 'submit' in request.POST:
controls = request.POST.items()
try:
appstruct = myform.validate(controls)
except ValidationFailure, e:
return {'form':e.render(), 'values': False}
Now this is a basic date picker, as shown here:
http://deform2demo.repoze.org/dateinput/
But all of a sudden, when using this widget I get the error:
Traceback (most recent call last):
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/pyramid_debugtoolbar-1.0.9-py2.7.egg/pyramid_debugtoolbar/panels/performance.py", line 55, in resource_timer_handler
result = handler(request)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/pyramid/tweens.py", line 21, in excview_tween
response = handler(request)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/pyramid_tm-0.7-py2.7.egg/pyramid_tm/__init__.py", line 82, in tm_tween
reraise(*exc_info)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/pyramid_tm-0.7-py2.7.egg/pyramid_tm/__init__.py", line 63, in tm_tween
response = handler(request)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/pyramid/router.py", line 161, in handle_request
response = view_callable(context, request)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/pyramid/config/views.py", line 347, in rendered_view
result = view(context, request)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/pyramid/config/views.py", line 493, in _requestonly_view
response = view(request)
File "/home/luke/pyramids/getwork2day.co.uk/getwork2day/views/signup.py", line 244, in user_signup
appstruct = myform.validate(controls)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/deform-2.0a2-py2.7.egg/deform/field.py", line 636, in validate
return self.validate_pstruct(pstruct)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/deform-2.0a2-py2.7.egg/deform/field.py", line 661, in validate_pstruct
cstruct = self.deserialize(pstruct)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/deform-2.0a2-py2.7.egg/deform/field.py", line 512, in deserialize
return self.widget.deserialize(self, pstruct)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/deform-2.0a2-py2.7.egg/deform/widget.py", line 1274, in deserialize
result[name] = subfield.deserialize(subval)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/deform-2.0a2-py2.7.egg/deform/field.py", line 512, in deserialize
return self.widget.deserialize(self, pstruct)
File "/home/luke/virts/pyramid-1.4.5/lib/python2.7/site-packages/deform-2.0a2-py2.7.egg/deform/widget.py", line 551, in deserialize
date = pstruct['date'].strip()
TypeError: string indices must be integers
At least dump current form controls to logging and post it here. Try to be more verbose & clear with your code examples and do not mix up colander schema code and pyramid view code.
if 'submit' in request.POST:
controls = request.POST.items()
try:
appstruct = myform.validate(controls)
except ValidationFailure, e:
log.debug('form validation fails for %r' % controls)
return {'form':e.render(), 'values': False}

Resources