Exporting from MS Excel to MS Access with intermediate processing - excel

I have an application which produces reports in Excel (.XLS) format. I need to append the data from these reports to an existing table in a MS Access 2010 database. A typical record is:
INC000000004154 Closed Cbeebies BBC Childrens HQ6 monitor wall dropping out. HQ6 P3 3/7/2013 7:03:01 PM 3/7/2013 7:03:01 PM 3/7/2013 7:14:15 PM The root cause of the problem was the power supply to the PC which was feeding the monitor. HQ6 Monitor wall dropping out. BBC Third Party Contractor supply this equipment.
The complication is that I need to do some limited processing on the data. Viz
Specifically I need to do a couple of lookups converting names to numbers and also parse a date-string (the report for some reason puts the dates in to the spreadsheet in text format rather than date format).
Now I could do this in Python using XLRD/XLWT but would much prefer to do it in Excel or Access. Does anyone have any advice on a good way to approach this? I would very much prefer NOT to use VBA so could I do something like record an MS Excel macro and then execute that macro on the newly created XLS file?

You can directly import some Excel data into MS Access, but if your requirement is to do some processing because then I don't see how you will be able to achieve that without:
an ETL application, like Pentaho or Talend or others.
That will certainly be like using a hammer to crush an ant though.
some other external data processing pipeline, in Python or some other programming language.
VBA (wether through macros or hand coded).
VBA has been really good at doing that sort of things in Access for literally decades.
Since you are using Excel and Access, staying within that realm looks like the best solution for solving your issue.
Just use queries:
You import the data without transformation to a table whose sole purpose is to accommodate the data from Excel; then you create queries from that raw data to add the missing information and massage the data before appending the result into your final destination table.
That solution has the advantage of letting you create simple steps in Access that you can easily record using macros.

I asked this question some time ago and decided it would be easier to do it in Python. Gord asked me to share, and here it is (sorry about the delay, other projects took priority for a while).
"""
Routine to migrate the S7 data from MySQL to the new Access
database.
We're using the pyodbc libraries to connect to Microsoft Access
Note that there are 32- and 64-bit versions of these libraries
available but in order to work the word-length for pyodbc and by
implication Python and all its associated compiled libraries must
match that of MS Access. Which is an arse as I've just had to
delete my 64-bit installation of Python and replace it and all
the libraries with the 32-bit version.
Tim Greening-Jackson 08 May 2013 (timATgreening-jackson.com)
"""
import pyodbc
import re
import datetime
import tkFileDialog
from Tkinter import *
class S7Incident:
"""
Class containing the records downloaded from the S7.INCIDENTS table
"""
def __init__(self, id_incident, priority, begin, acknowledge,
diagnose, workaround,fix, handoff, lro, nlro,
facility, ctas, summary, raised, code):
self.id_incident=unicode(id_incident)
self.priority = {u'P1':1, u'P2':2, u'P3':3, u'P4':4, u'P5':5} [unicode(priority.upper())]
self.begin = begin
self.acknowledge = acknowledge
self.diagnose = diagnose
self.workaround = workaround
self.fix = fix
self.handoff = True if handoff else False
self.lro = True if lro else False
self.nlro = True if nlro else False
self.facility = unicode(facility)
self.ctas = ctas
self.summary = "** NONE ***" if type(summary) is NoneType else summary.replace("'","")
self.raised = raised.replace("'","")
self.code = 0 if code is None else code
self.production = None
self.dbid = None
def __repr__(self):
return "[{}] ID:{} P{} Prod:{} Begin:{} A:{} D:+{}s W:+{}s F:+{}s\nH/O:{} LRO:{} NLRO:{} Facility={} CTAS={}\nSummary:'{}',Raised:'{}',Code:{}".format(
self.id_incident,self.dbid, self.priority, self.production, self.begin,
self.acknowledge, self.diagnose, self.workaround, self.fix,
self.handoff, self.lro, self.nlro, self.facility, self.ctas,
self.summary, self.raised, self.code)
def ProcessIncident(self, cursor, facilities, productions):
"""
Produces the SQL necessary to insert the incident in to the Access
database, executes it and then gets the autonumber ID (dbid) of the newly
created incident (this is used so LRO, NRLO CTAS and AD1 can refer to
their parent incident.
If the incident is classed as LRO, NLRO, CTAS then the appropriate
record is created. Returns the dbid.
"""
if self.raised.upper() in productions:
self.production = productions[self.raised.upper()]
else:
self.production = 0
sql="""INSERT INTO INCIDENTS (ID_INCIDENT, PRIORITY, FACILITY, BEGIN,
ACKNOWLEDGE, DIAGNOSE, WORKAROUND, FIX, HANDOFF, SUMMARY, RAISED, CODE, PRODUCTION)
VALUES ('{}', {}, {}, #{}#, {}, {}, {}, {}, {}, '{}', '{}', {}, {})
""".format(self.id_incident, self.priority, facilities[self.facility], self.begin,
self.acknowledge, self.diagnose, self.workaround, self.fix,
self.handoff, self.summary, self.raised, self.code, self.production)
cursor.execute(sql)
cursor.execute("SELECT ##IDENTITY")
self.dbid = cursor.fetchone()[0]
if self.lro:
self.ProcessLRO(cursor, facilities[self.facility])
if self.nlro:
self.ProcessNLRO(cursor, facilities[self.facility])
if self.ctas:
self.ProcessCTAS(cursor, facilities[self.facility], self.ctas)
return self.dbid
def ProcessLRO(self, cursor, facility):
sql = "INSERT INTO LRO (PID, DURATION, FACILITY) VALUES ({}, {}, {})"\
.format(self.dbid, self.workaround, facility)
cursor.execute(sql)
def ProcessNLRO(self, cursor, facility):
sql = "INSERT INTO NLRO (PID, DURATION, FACILITY) VALUES ({}, {}, {})"\
.format(self.dbid, self.workaround, facility)
cursor.execute(sql)
def ProcessCTAS(self, cursor, facility, code):
sql = "INSERT INTO CTAS (PID, DURATION, FACILITY, CODE) VALUES ({}, {}, {}, {})"\
.format(self.dbid, self.workaround, facility, self.ctas)
cursor.execute(sql)
class S7AD1:
"""
S7.AD1 records.
"""
def __init__(self, id_ad1, date, ref, commentary, adjustment):
self.id_ad1 = id_ad1
self.date = date
self.ref = unicode(ref)
self.commentary = unicode(commentary)
self.adjustment = float(adjustment)
self.pid = 0
self.production = 0
def __repr__(self):
return "[{}] Date:{} Parent:{} PID:{} Amount:{} Commentary: {} "\
.format(self.id_ad1, self.date.strftime("%d/%m/%y"), self.ref, self.pid, self.adjustment, self.commentary)
def SetPID(self, pid):
self.pid = pid
def SetProduction(self, p):
self.production = p
def Process(self, cursor):
sql = "INSERT INTO AD1 (pid, begin, commentary, production, adjustment) VALUES ({}, #{}#, '{}', {}, {})"\
.format(self.pid, self.date.strftime("%d/%m/%y"), self.commentary, self.production, self.adjustment)
cursor.execute(sql)
class S7Financial:
"""
S7 monthly financial summary of income and penalties from S7.FINANCIALS table.
These are identical in the new database
"""
def __init__(self, month, year, gco, cta, support, sc1, sc2, sc3, ad1):
self.begin = datetime.date(year, month, 1)
self.gco = float(gco)
self.cta = float(cta)
self.support = float(support)
self.sc1 = float(sc1)
self.sc2 = float(sc2)
self.sc3 = float(sc3)
self.ad1 = float(ad1)
def __repr__(self):
return "Period: {} GCO:{:.2f} CTA:{:.2f} SUP:{:.2f} SC1:{:.2f} SC2:{:.2f} SC3:{:.2f} AD1:{:.2f}"\
.format(self.start.strftime("%m/%y"), self.gco, self.cta, self.support, self.sc1, self.sc2, self.sc3, self.ad1)
def Process(self, cursor):
"""
Insert in to FINANCIALS table
"""
sql = "INSERT INTO FINANCIALS (BEGIN, GCO, CTA, SUPPORT, SC1, SC2, SC3, AD1) VALUES (#{}#, {}, {}, {}, {}, {}, {},{})"\
.format(self.begin, self.gco, self.cta, self.support, self.sc1, self.sc2, self.sc3, self.ad1)
cursor.execute(sql)
class S7SC3:
"""
Miscellaneous S7 SC3 stuff. The new table is identical to the old one.
"""
def __init__(self, begin, month, year, p1ot, p2ot, totchg, succchg, chgwithinc, fldchg, egychg):
self.begin = begin
self.p1ot = p1ot
self.p2ot = p2ot
self.changes = totchg
self.successful = succchg
self.incidents = chgwithinc
self.failed = fldchg
self.emergency = egychg
def __repr__(self):
return "{} P1:{} P2:{} CHG:{} SUC:{} INC:{} FLD:{} EGY:{}"\
.format(self.period.strftime("%m/%y"), self.p1ot, self.p1ot, self.changes, self.successful, self.incidents, self.failed, self.emergency)
def Process(self, cursor):
"""
Inserts a record in to the Access database
"""
sql = "INSERT INTO SC3 (BEGIN, P1OT, P2OT, CHANGES, SUCCESSFUL, INCIDENTS, FAILED, EMERGENCY) VALUES\
(#{}#, {}, {}, {}, {}, {}, {}, {})"\
.format(self.begin, self.p1ot, self.p2ot, self.changes, self.successful, self.incidents, self.failed, self.emergency)
cursor.execute(sql)
def ConnectToAccessFile():
"""
Prompts the user for an Access database file, connects, creates a cursor,
cleans out the tables which are to be replaced, gets a hash of the facilities
table keyed on facility name returning facility id
"""
# Prompts the user to select which Access DB file he wants to use and then attempts to connect
root = Tk()
dbname = tkFileDialog.askopenfilename(parent=root, title="Select output database", filetypes=[('Access 2010', '*.accdb')])
root.destroy()
# Connect to the Access (new) database and clean its existing incidents etc. tables out as
# these will be replaced with the new data
dbcxn = pyodbc.connect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ="+dbname+";")
dbcursor=dbcxn.cursor()
print("Connected to {}".format(dbname))
for table in ["INCIDENTS", "AD1", "LRO", "NLRO", "CTAS", "SC3", "PRODUCTIONS", "FINANCIALS"]:
print("Clearing table {}...".format(table))
dbcursor.execute("DELETE * FROM {}".format(table))
# Get the list of facilities from the Access database...
dbcursor.execute("SELECT id, facility FROM facilities")
rows = dbcursor.fetchall()
dbfacilities = {unicode(row[1]):row[0] for row in rows}
return dbcxn, dbcursor, dbfacilities
# Entry point
incre = re.compile("INC\d{12}[A-Z]?") # Regex that matches incident references
try:
dbcxn, dbcursor, dbfacilities = ConnectToAccessFile()
# Connect to the MySQL S7 (old) database and read the incidents and ad1 tables
s7cxn = pyodbc.connect("DRIVER={MySQL ODBC 3.51 Driver}; SERVER=localhost;DATABASE=s7; UID=root; PASSWORD=********; OPTION=3")
print("Connected to MySQL S7 database")
s7cursor = s7cxn.cursor()
s7cursor.execute("""
SELECT id_incident, priority, begin, acknowledge,
diagnose, workaround, fix, handoff, lro, nlro,
facility, ctas, summary, raised, code FROM INCIDENTS""")
rows = s7cursor.fetchall()
# Discard any incidents which don't have a reference of the form INC... as they are ancient
print("Fetching incidents")
s7incidents = {unicode(row[0]):S7Incident(*row) for row in rows if incre.match(row[0])}
# Get the list of productions from the S7 database to replace the one we've just deleted ...
print("Fetching productions")
s7cursor.execute("SELECT DISTINCT RAISED FROM INCIDENTS")
rows = s7cursor.fetchall()
s7productions = [r[0] for r in rows]
# ... now get the AD1s ...
print("Fetching AD1s")
s7cursor.execute("SELECT id_ad1, date, ref, commentary, adjustment from AD1")
rows = s7cursor.fetchall()
s7ad1s = [S7AD1(*row) for row in rows]
# ... and the financial records ...
print("Fetching Financials")
s7cursor.execute("SELECT month, year, gco, cta, support, sc1, sc2, sc3, ad1 FROM Financials")
rows = s7cursor.fetchall()
s7financials = [S7Financial(*row) for row in rows]
print("Writing financials ({})".format(len(s7financials)))
[p.Process(dbcursor) for p in s7financials]
# ... and the SC3s.
print("Fetching SC3s")
s7cursor.execute("SELECT begin, month, year, p1ot, p2ot, totchg, succhg, chgwithinc, fldchg, egcychg from SC3")
rows = s7cursor.fetchall()
s7sc3s = [S7SC3(*row) for row in rows]
print("Writing SC3s ({})".format(len(s7sc3s)))
[p.Process(dbcursor) for p in s7sc3s]
# Re-create the productions table in the new database. Note we refer to production
# by number in the incidents table so need to do the SELECT ##IDENTITY to give us the
# autonumber index. To make sure everything is case-insensitive convert the
# hash keys to UPPERCASE.
dbproductions = {}
print("Writing productions ({})".format(len(s7productions)))
for p in sorted(s7productions):
dbcursor.execute("INSERT INTO PRODUCTIONS (PRODUCTION) VALUES ('{}')".format(p))
dbcursor.execute("SELECT ##IDENTITY")
dbproductions[p.upper()] = dbcursor.fetchone()[0]
# Now process the incidents etc. that we have retrieved from the S7 database
print("Writing incidents ({})".format(len(s7incidents)))
[s7incidents[k].ProcessIncident(dbcursor, dbfacilities, dbproductions) for k in sorted(s7incidents)]
# Match the new parent incident IDs in the AD1s and then write to the new table. Some
# really old AD1s don't have the parent incident reference in the REF field, it is just
# mentioned SOMEWHERE in the commentary. So if the REF field doesn't match then do a
# re.search (not re.match!) for it. It isn't essential to match these older AD1s with
# their parent incident, but it is quite useful (and tidy).
print("Matching and writing AD1s".format(len(s7ad1s)))
for a in s7ad1s:
if a.ref in s7incidents:
a.SetPID(s7incidents[a.ref].dbid)
a.SetProduction(s7incidents[a.ref].production)
else:
z=incre.search(a.commentary)
if z and z.group() in s7incidents:
a.SetPID(s7incidents[z.group()].dbid)
a.SetProduction(s7incidents[z.group()].production)
a.Process(dbcursor)
print("Comitting changes")
dbcursor.commit()
finally:
print("Closing databases")
dbcxn.close()
s7cxn.close()

It turns out that the file has additional complications in terms of mangled data which will require a degree of processing which is a pain to do in Excel but trivially simple in Python. So I will re-use some Python 2.x scripts which use the XLWT/XLRD libraries to munge the spreadsheet.

Related

Random key lookup on LMDB/python vs BerkeleyBD/python (How to make LMDB lookup faster)

I have this program written in python that uses berkeleydb to store data (event logs) which i migrated to lmdb. My problem is, before an event gets written, the program does a lookup if the event already exists. I noticed that the berkeleydb version is much faster in doing the single value lookup using 13k+ records (as if the lmdb version is 1 second slower for every lookup) even with transactions enabled in berkeleydb. Any idea how to speed up the lmdb version? Note that I've had 70gb+ (about 30 million records) worth of data already stored in my berkeleydb and doing additional processing on those events takes me more than an hour so I thought switching to lmdb would decrease the processing time.
My LMDB environment was opened this way (I event set the readahead to False (but the database size is just about 35mb so I don't think it matters):
env = lmdb.open(db_folder, map_size=100000000000, max_dbs=4, readahead=False)
database = env.open_db('events'.encode())
My berkeleydb was opened this way:
env = db.DBEnv()
env.open(db_folder, db.DB_INIT_MPOOL | db.DB_CREATE | db.DB_INIT_LOG | db.DB_INIT_TXN | db.DB_RECOVER, 0)
database = db.DB(env)
BerkeleyDB version of check:
if event['eId'].encode('utf-8') in database:
duplicate_count += 1
else:
try:
txn = env.txn_begin(None)
database[event['eId'].encode('utf-8')] = json.dumps(event).encode('utf-8')
except:
if txn is not None:
txn.abort()
txn = None
raise
else:
txn.commit()
txn = None
event_count += 1
lmdb version:
with env.begin(buffers=True, db=database) as txn:
if (txn.get(event['eId'].encode()) is not None):
dup_event_count += 1
else:
txn.put(event['eId'].encode(), json.dumps(event).encode('utf-8'))
event_count += 1
Solution:
Place with env.begin outside the loop:
#case('rand lookup')
def test():
with env.begin() as txn:
for word in words:
txn.get(word)
return len(words)
#case('per txn rand lookup')
def test():
for word in words:
with env.begin() as txn:
txn.get(word)
return len(words)
Figured this out myself. What I'm doing is a per transaction random lookup. I just had to place with env.begin outside of the for loop (not visible in my example) as suggested in this example: https://raw.githubusercontent.com/jnwatson/py-lmdb/master/examples/dirtybench.py

Is it possible to use SQLite in EFS reliably?

Is it possible to use SQLite in AWS EFS safely? In my readings trying to determine if this is viable there appears to be some allusions that it should be doable since AWS EFS implemented NFSv4 back in 2017. In practice I am having no luck getting consistent behavior out of it.
Quick Points:
"Just use AWS RDS": Due to issues with other AWS architecture another team has implemented we are trying to work around resource starving cause by the API (DynamoDB isn't an option)
"This goes against SQLite's primary use case (being a locally access DB): Yes, but given the circumstances it seems like the best approach.
I have verified that we are running nfsv4 on our EC2 instance
Current results are very inconsistent with 3 exceptions encountered irrespective of approach I use
"file is encrypted or is not a database"
"disk I/O error (potentially related to EFS open file limits)"
"database disk image is malformed" (The database actually isn't corrupted after this)
database code:
SQLITE_VAR_LIMIT = 999
dgm_db_file_name = ''
db = SqliteExtDatabase(None)
lock_file = f'{os.getenv("efs_path", "tmp")}/db_lock_file.lock'
def lock_db_file():
with open(lock_file, 'w+') as lock:
limit = 900
while limit:
try:
fcntl.flock(lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
print(f'db locked')
break
except Exception as e:
print(f'Exception: {str(e)}')
limit -= 1
time.sleep(1)
if not limit:
raise ValueError(f'Timed out after 900 seconds while waiting for database lock.')
def unlock_db_file():
with open(lock_file, 'w+') as lock:
fcntl.flock(lock, fcntl.LOCK_UN)
print(f'db unlocked')
def initialize_db(db_file_path=dgm_db_file_name):
print(f'Initializing db ')
global db
db.init(db_file_path, pragmas={
'journal_mode': 'wal',
'cache_size': -1 * 64000, # 64MB
'foreign_keys': 1})
print(f'db initialized')
class Thing(Model):
name = CharField(primary_key=True)
etag = CharField()
last_modified = CharField()
class Meta:
database = db
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
#staticmethod
def insert_many(stuff):
data = [(k, v['ETag'], v['Last-Modified']) for k, v in stuff.items()]
fields = [Thing.name, Thing.etag, Thing.last_modified]
limit = 900
while True:
try:
with db.atomic():
for key_batch in chunked(data, SQLITE_VAR_LIMIT // len(fields)):
s = Thing.insert_many(key_batch, fields=[Thing.name, Thing.etag, Thing.last_modified]) \
.on_conflict_replace().execute()
break
except Exception as e:
print(f'Exception: {str(e)}')
print(f'Will try for {limit} more seconds.')
limit -= 1
time.sleep(1)
if not limit:
raise ValueError('Failed to exectue query after 900 seconds.')
Example Call:
print(f'Critical section start')
# lock_db_file() # I have tried with a secondary lock file as well
self.stuff_db = Thing()
if not Path(self.db_file_path).exists():
initialize_db(self.db_file_path)
print('creating tables')
db.create_tables([Thing], safe=True)
else:
initialize_db(self.db_file_path)
getattr(Thing, insert_many)(self.stuff_db, stuff_db)
# db.close()
# unlock_db_file()
print(f'Critical section end')
print(f'len after update: {len(stuff)}')
Additional peculiarities:
If a lamda gets stuck catching the "malformed image" exception and a new lambda execution is triggered, the error resolves in the other lambda.
After some trial and error I discovered it is a workable solution. It appears that the design will need to use APSWDatabase(..., vfs='unix-excl') to properly enforce locking.
Database code:
from peewee import *
from playhouse.apsw_ext import APSWDatabase
SQLITE_VAR_LIMIT = 999
db = APSWDatabase(None, vfs='unix-excl')
def initialize_db(db_file_path):
global db
db.init(db_file_path, pragmas={
'journal_mode': 'wal',
'cache_size': -1 * 64000})
db.create_tables([Thing], safe=True)
return Thing()
class Thing(Model):
field_1 = CharField(primary_key=True)
field_2 = CharField()
field_3 = CharField()
class Meta:
database = db
This allows for the following usage:
db_model = initialize_db(db_file_path)
with db:
# Do database queries here with the db_model
pass
Note: If you don't use the context managed database connection you will need to explicitly call db.close() otherwise the lock will not be released from the file. Additionally, calling db_init(...) causes a lock to be placed on the databased until it is closed.

When working with the Stripe API, is it better to sort each request or store locally and perform queries?

This is my first post, I've been lurking for a while.
Some context to my question;
I'm working with the Stripe API to pull transaction data and match these with booking numbers from another API source. (property reservations --> funds received for reconciliation)
I started by just making calls to the API and sorting the data in place using python 3, however it started to get very complicated and I thought I should persist the data in a mongodb stored on localhost. I began to do this, however I decided that storing the sorted data was still just as complicated and the request times were getting quite long, I thought, maybe I should pull all the stripe data and store it locally and then query whatever I needed.
So here I am, with a bunch of code I've written for both and still not alot of progress. I'm a bit lost with the next move. I feel like I should probably pick a path and stick with it. I'm a little unsure what is the "best practise" when working with API's, usually I would turn to YouTube, but I haven't been able to find a video which covers this specific scenario. The amount of data being pulled from the API would be around 100kb per request.
Here is the original code which would grab each query. Recently I've learnt I can use the expand method (I think this is what it's called) so I don't need to dig down so many levels in my for loop.
The goal was to get just the metadata which contains the booking reference numbers that can then be match against a response from my property management systems API. My code is a bit embarrassing, I've kinda just learnt it over the last little while in my downtime from work.
import csv
import datetime
import os
import pymongo
import stripe
"""
We need to find a Valid reservation_ref or reservation_id in the booking.com Metadata. Then we need to match this to a property ID from our list of properties in the book file.
"""
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["mydatabase"]
stripe_payouts = mydb["stripe_payouts"]
stripe.api_key = "sk_live_thisismyprivatekey"
r = stripe.Payout.list(limit=4)
payouts = []
for data in r['data']:
if data['status'] == 'paid':
p_id = data['id']
amount = data['amount']
meta = []
txn = stripe.BalanceTransaction.list(payout=p_id)
amount_str = str(amount)
amount_dollar = str(amount / 100)
txn_len = len(txn['data'])
for x in range(txn_len):
if x != 0:
charge = (txn['data'][x]['source'])
if charge.startswith("ch_"):
meta_req = stripe.Charge.retrieve(charge)
meta = list(meta_req['metadata'])
elif charge.startswith("re_"):
meta_req = stripe.Refund.retrieve(charge)
meta = list(meta_req['metadata'])
if stripe_payouts.find({"_id": p_id}).count() == 0:
payouts.append(
{
"_id": str(p_id),
"payout": str(p_id),
"transactions": txn['data'],
"metadata": {
charge: [meta]
}
}
)
# TODO: Add error exception to check for po id already in the database.
if len(payouts) != 0:
x = stripe_payouts.insert_many(payouts)
print("Inserted into Database ", len(x.inserted_ids), x.inserted_ids)
else:
print("No entries made")
"_id": str(p_id),
"payout": str(p_id),
"transactions": txn['data'],
"metadata": {
charge: [meta]
This last section doesn't work properly, this is kinda where I stopped and starting calling all the data and storing it in mongodb locally.
I appreciate if you've read this wall of text this far.
Thanks
EDIT:
I'm unsure what the best practise is for adding additional information, but I've messed with the code below per the answer given. I'm now getting a "Key error" when trying to insert the entries into the database. I feel like It's duplicating keys somehow.
payouts = []
def add_metadata(payout_id, transaction_type):
transactions = stripe.BalanceTransaction.list(payout=payout_id, type=transaction_type, expand=['data.source'])
for transaction in transactions.auto_paging_iter():
meta = [transaction.source.metadata]
if stripe_payouts.Collection.count_documents({"_id": payout_id}) == 0:
payouts.append(
{
transaction.id: transaction
}
)
for data in r['data']:
p_id = data['id']
add_metadata(p_id, 'charge')
add_metadata(p_id, 'refund')
# TODO: Add error exception to check for po id already in the database.
if len(payouts) != 0:
x = stripe_payouts.insert_many(payouts)
#print(payouts)
print("Inserted into Database ", len(x.inserted_ids), x.inserted_ids)
else:
print("No entries made")```
To answer your high level question. If you're frequently accessing the same data and that data isn't changing much then it can make sense to try to keep your local copy of the data in sync and make your frequent queries against your local data.
No need to be embarrassed by your code :) we've all been new at something at some point.
Looking at your code I noticed a few things:
Rather than fetch all payouts, then use an if statement to skip all except paid, instead you can pass another filter to only query those paid payouts.
r = stripe.Payout.list(limit=4, status='paid')
You mentioned the expand [B] feature of the API, but didn't use it so I wanted to share how you can do that here with an example. In this case, you're making 1 API call to get the list of payouts, then 1 API call per payout to get the transactions, then 1 API call per charge or refund to get the metadata for charges or metadata for refunds. This results in 1 * (n payouts) * (m charges or refunds) which is a pretty big number. To cut this down, let's pass expand=['data.source'] when fetching transactions which will include all of the metadata about the charge or refund along with the transaction.
transactions = stripe.BalanceTransaction.list(payout=p_id, expand=['data.source'])
Fetching the BalanceTransaction list like this will only work as long as your results fit on one "page" of results. The API returns paginated [A] results, so if you have more than 10 transactions per payout, this will miss some. Instead, you can use an auto-pagination feature of the stripe-python library to iterate over all results from the BalanceTransaction list.
for transaction in transactions.auto_paging_iter():
I'm not quite sure why we're skipping over index 0 with if x != 0: so that may need to be addressed elsewhere :D
I didn't see how or where amount_str or amount_dollar was actually used.
Rather than determining the type of the object by checking the ID prefix like ch_ or re_ you'll want to use the type attribute. Again in this case, it's better to filter by type so that you only get exactly the data you need from the API:
transactions = stripe.BalanceTransaction.list(payout=p_id, type='charge', expand=['data.source'])
I'm unable to test because I lack the same database that you have, but wanted to share a refactoring of your code that you may consider.
r = stripe.Payout.list(limit=4, status='paid')
payouts = []
for data in r['data']:
p_id = data['id']
amount = data['amount']
meta = []
amount_str = str(amount)
amount_dollar = str(amount / 100)
transactions = stripe.BalanceTransaction.list(payout=p_id, type='charge', expand=['data.source'])
for transaction in transactions.auto_paging_iter():
meta = list(transaction.source.metadata)
if stripe_payouts.find({"_id": p_id}).count() == 0:
payouts.append(
{
"_id": str(p_id),
"payout": str(p_id),
"transactions": transactions,
"metadata": {
charge: [meta]
}
}
)
transactions = stripe.BalanceTransaction.list(payout=p_id, type='refund', expand=['data.source'])
for transaction in transactions.auto_paging_iter():
meta = list(transaction.source.metadata)
if stripe_payouts.find({"_id": p_id}).count() == 0:
payouts.append(
{
"_id": str(p_id),
"payout": str(p_id),
"transactions": transactions,
"metadata": {
charge: [meta]
}
}
)
# TODO: Add error exception to check for po id already in the database.
if len(payouts) != 0:
x = stripe_payouts.insert_many(payouts)
print("Inserted into Database ", len(x.inserted_ids), x.inserted_ids)
else:
print("No entries made")
Here's a further refactoring using functions defined to encapsulate just the bit adding to the database:
r = stripe.Payout.list(limit=4, status='paid')
payouts = []
def add_metadata(payout_id, transaction_type):
transactions = stripe.BalanceTransaction.list(payout=payout_id, type=transaction_tyep, expand=['data.source'])
for transaction in transactions.auto_paging_iter():
meta = list(transaction.source.metadata)
if stripe_payouts.find({"_id": payout_id}).count() == 0:
payouts.append(
{
"_id": str(payout_id),
"payout": str(payout_id),
"transactions": transactions,
"metadata": {
charge: [meta]
}
}
)
for data in r['data']:
p_id = data['id']
add_metadata('charge')
add_metadata('refund')
# TODO: Add error exception to check for po id already in the database.
if len(payouts) != 0:
x = stripe_payouts.insert_many(payouts)
print("Inserted into Database ", len(x.inserted_ids), x.inserted_ids)
else:
print("No entries made")
[A] https://stripe.com/docs/api/pagination
[B] https://stripe.com/docs/api/expanding_objects

How to call a function every interval time Odoo 11

i know we can create automatic action using cron in odoo
but I want something a different
in the mass mailing of odoo i want to add a repetion option of mail mass mailings
Example in the Form view_mail_mass_mailing_form > Options page
I added a repetition selection field,
I added this because I want each mass mail alone
class MailMassMailing(models.Model):
_inherit = 'mail.mass_mailing'
recurrence_mail = fields.Selection([
('daily', 'Day'),
('weekly', 'Weeks'),
('monthly', 'Months'),
], string='Recurring')
I want this mass mailng to send each (days or weeks or months)
how to call a function with interval date,
how to call a function every (days or weeks or months)
The sending of this mass mailing is revived from the date of creation
Just extend Mass Mailing model with a new date field and implement a model method to use for a daily running ir.cron.
from odoo import api, fields, models
class MailMassMailing(models.Model):
_inherit = 'mail.mass_mailing'
recurrence_mail = fields.Selection([
('daily', 'Day'),
('weekly', 'Weeks'),
('monthly', 'Months'),
], string='Recurring')
last_sent_on = fields.Date()
#api.model
def run_send_recurring(self):
""" Resend mass mailing with recurring interval"""
domain = [('recurrence_mail', '!=', False)]
# TODO monthly should be solved in another way, but that
# is not needed for this example
deltas = {'daily': 1, 'weekly': 7, 'monthly': 30}
today = fields.Date.today()
for mass_mail in self.search(domain):
# never sent? go send it
if not mass_mail.last_sent_on:
# send the way you want
# or get delta between today and last_sent_on
last_dt = fields.Date.from_string(mass_mail.last_sent_on)
if (today - last_dt).days >= deltas[mass_mail.recurrence_mail]:
# send the way you want
Thank you #CZoellner for your help
I found the solution with your idea
# Solution ############### .py
#api.model
def run_send_recurring(self):
""" Resend mass mailing with recurring interval"""
date_format = '%Y-%m-%d'
domain = [('recurrence_mail', '!=', False),('state','=','done')]
deltas = {'daily': 1, 'weekly': 7, 'monthly': 30}
logger.info("______deltas________: %s ",deltas)
today = fields.Date.today()
logger.info("______today________: %s ",today)
for mass_mail in self.search(domain):
logger.info("______mass_mail________: %s ",mass_mail)
# never sent? go send it
if not mass_mail.last_sent_on:
self.put_in_queue()
joining_date = mass_mail.last_sent_on
current_date = (datetime.today()).strftime(date_format)
print('joining_date',joining_date)
d1 = datetime.strptime(joining_date, date_format).date()
logger.info("______1 day________: %s ",d1)
d2 = datetime.strptime(current_date, date_format).date()
logger.info("______2 day________: %s ",d2)
logger.info("______deltas[mass_mail.recurrence_mail]________: %s ",deltas[mass_mail.recurrence_mail])
r = relativedelta(d1,d2)
logger.info("______r day________: %s ",r.days)
if (r ,'>=' , deltas[mass_mail.recurrence_mail]):
mass_mail.put_in_queue()

QTreeView crashing with no apparent reason

I introduced a treeview in the GUI of the program I'm making and since it crashes when I attempt to change its model once it has been set.
The course of action is:
load the file using a file dialogue
clearing the models on the interface objects (tables and treeview). The first time the treeview is not affected since there is no model in
it.
Populate the treeview model.
other stuff not related to the issue.
The problematic functions are;
The file loading procedure:
def open_file(self):
"""
Open a file
:return:
"""
print("actionOpen_file_click")
# declare the dialog
# file_dialog = QtGui.QFileDialog(self)
# declare the allowed file types
files_types = "Excel 97 (*.xls);;Excel (*.xlsx);;DigSILENT (*.dgs);;MATPOWER (*.m)"
# call dialog to select the file
filename, type_selected = QtGui.QFileDialog.getOpenFileNameAndFilter(self, 'Open file',
self.project_directory, files_types)
if len(filename) > 0:
self.project_directory = os.path.dirname(filename)
print(filename)
self.circuit = Circuit(filename, True)
# set data structures list model
self.ui.dataStructuresListView.setModel(self.available_data_structures_listModel)
# set the first index
index = self.available_data_structures_listModel.index(0, 0, QtCore.QModelIndex())
self.ui.dataStructuresListView.setCurrentIndex(index)
# clean
self.clean_GUI()
# load table
self.display_objects_table()
# draw graph
self.ui.gridPlot.setTitle(os.path.basename(filename))
self.re_plot()
# show times
if self.circuit.time_series is not None:
if self.circuit.time_series.is_ready():
self.set_time_comboboxes()
# tree view at the results
self.set_results_treeview_structure()
# populate editors
self.populate_editors_defaults()
The treeview model assignation:
def set_results_treeview_structure(self):
"""
Sets the results treeview data structure
#return:
"""
# self.ui.results_treeView.setSelectionBehavior(QtGui.QAbstractItemView.SelectRows)
model = QtGui.QStandardItemModel()
# model.setHorizontalHeaderLabels(['Elements'])
self.ui.results_treeView.setModel(model)
# self.ui.results_treeView.setUniformRowHeights(True)
def pass_to_QStandardItem_list(list_):
res = list()
for elm in list_:
elm1 = QtGui.QStandardItem(elm)
elm1.setEditable(False)
res.append(elm1)
return res
bus_results = pass_to_QStandardItem_list(['Voltages (p.u.)', 'Voltages (kV)'])
per_bus_results = pass_to_QStandardItem_list(['Voltage (p.u.) series', 'Voltage (kV) series',
'Active power (MW)', 'Reactive power (MVar)',
'Active and reactive power (MW, MVar)', 'Aparent power (MVA)',
'S-V curve', 'Q-V curve'])
branches_results = pass_to_QStandardItem_list(['Loading (%)', 'Current (p.u.)',
'Current (kA)', 'Losses (MVA)'])
per_branch_results = pass_to_QStandardItem_list(['Loading (%) series', 'Current (p.u.) series',
'Current (kA) series', 'Losses (MVA) series'])
generator_results = pass_to_QStandardItem_list(['Reactive power (p.u.)', 'Reactive power (MVar)'])
per_generator_results = pass_to_QStandardItem_list(['Reactive power (p.u.) series',
'Reactive power (MVar) series'])
self.family_results_per_family = dict()
# nodes
buses = QtGui.QStandardItem('Buses')
buses.setEditable(False)
buses.appendRows(bus_results)
self.family_results_per_family[0] = len(bus_results)
names = self.circuit.bus_names
for name in names:
bus = QtGui.QStandardItem(name)
bus.appendRows(per_bus_results)
bus.setEditable(False)
buses.appendRow(bus)
# branches
branches = QtGui.QStandardItem('Branches')
branches.setEditable(False)
branches.appendRows(branches_results)
self.family_results_per_family[1] = len(branches_results)
names = self.circuit.branch_names
for name in names:
branch = QtGui.QStandardItem(name)
branch.appendRows(per_branch_results)
branch.setEditable(False)
branches.appendRow(branch)
# generators
generators = QtGui.QStandardItem('Generators')
generators.setEditable(False)
generators.appendRows(generator_results)
self.family_results_per_family[2] = len(generator_results)
names = self.circuit.gen_names
for name in names:
gen = QtGui.QStandardItem(name)
gen.appendRows(per_generator_results)
gen.setEditable(False)
generators.appendRow(gen)
model.appendRow(buses)
model.appendRow(branches)
model.appendRow(generators)
And the GUI "cleaning":
def clean_GUI(self):
"""
Initializes the comboboxes and tables
Returns:
"""
self.ui.tableView.setModel(None)
if self.ui.results_treeView.model() is not None:
self.ui.results_treeView.model().clear()
self.ui.profile_time_selection_comboBox.clear()
self.ui.results_time_selection_comboBox.clear()
self.ui.gridPlot.clear()
The complete code can be seen here
I have seen that this behavior is usually triggered by calls outside the GUI thread by I don think this is the case.
I'd appreciate if someone could point out the problem. Again the complate code for test is here.
The solution to this in my case has been the following:
The QStandardItemModel() variable called model in the code was turned into a class global variable self.tree_model
When I want to replace the treeview object model, I delete the global tree_model with del self.tree_model
Then I re-create the global model with self.tree_model = QStandardItemModel()
This way the TreeView object model is effectively replaced without crashing...

Resources