Read records from CSV file and print report - python-3.x

I have been working on a program for a week now, but have been unable to get it to work according to the guidelines.
In this program (payroll.py), I have to open the CSV data file (employees.csv), read the records in the file, and produce a payroll report using the functions in payroll.py. The output should be printed, not written to a separate output file, and should end up looking like this:
LastName FirstName Hours RegHours OTHours RegPay OTPay GrossPay Deductions NetPay
Hightower Michael 42.0 40.0 2.0 400.00 30.00 430.00 107.07 322.93
Jackson Samuel 53.0 40.0 13.0 506.00 246.68 752.67 187.42 565.25
Jones Catherine 35.0 35.0 0.00 680.05 0.00 680.05 169.33 510.72
The payroll program works just fine on its own (without calling the CSV file), but when I try to call the file (using "from csv import reader"), one of two things happens:
1) I can call the first three columns (last name, first name, and hours), but I am unable to "insert" the additional columns (I get an index error because, of course, those columns don't exist in the original CSV file), or
2) The program only pulls up one entire record, which happens to be the last record in the CSV file.
Any guidance on how to accomplish this would be greatly appreciated. Thank you.
Here is the code for payroll.py:
def main() :
employeeFirstName, employeeLastName = employeeFullName()
employeePayRate, employeeHoursWorked = employeePay()
employeeRegularHours, employeeOvertimeHours = calculateRegularHours(employeeHoursWorked)
employeeOvertimeHours = calculateOvertimeHours(employeeHoursWorked)
employeeTotalHours = calculateTotalHours(employeeRegularHours, employeeOvertimeHours)
regularPayAmount = calculateRegularPay(employeePayRate, employeeRegularHours)
overtimePayAmount = calculateOvertimePay(employeePayRate, employeeOvertimeHours)
grossPayAmount = calculateGrossPay(regularPayAmount, overtimePayAmount)
federalTaxWithheld = calculateFederalTax(grossPayAmount)
stateTaxWithheld = calculateStateTax(grossPayAmount)
medicareTaxWithheld = calculateMedicareTax(grossPayAmount)
socSecTaxWithheld = calculateSocSecTax(grossPayAmount)
totalTaxesWithheld = calculateTotalTaxes(federalTaxWithheld, stateTaxWithheld, medicareTaxWithheld, socSecTaxWithheld)
netPayAmount = calculateNetPay(grossPayAmount, totalTaxesWithheld)
payrollSummaryReport(employeeFirstName, employeeLastName, employeePayRate, employeeRegularHours, employeeOvertimeHours, employeeTotalHours, regularPayAmount, overtimePayAmount, grossPayAmount, federalTaxWithheld, stateTaxWithheld, medicareTaxWithheld, socSecTaxWithheld, totalTaxesWithheld, netPayAmount)
def employeeFullName() :
employeeFirstName = str(input("Enter the employee's first name: "))
employeeLastName = str(input("Enter the employee's last name: "))
return employeeFirstName, employeeLastName
def employeePay() :
employeePayRate = float(input("Enter the employee's hourly pay rate: "))
employeeHoursWorked = float(input("Enter the employee's hours worked: "))
return employeePayRate, employeeHoursWorked
def calculateRegularHours(employeeHoursWorked) :
if employeeHoursWorked < 40 :
employeeRegularHours = employeeHoursWorked
employeeOvertimeHours = 0
else:
employeeRegularHours = 40
employeeOvertimeHours = employeeHoursWorked - 40
return employeeRegularHours, employeeOvertimeHours
def calculateOvertimeHours(employeeHoursWorked) :
if employeeHoursWorked > 40 :
employeeOvertimeHours = employeeHoursWorked - 40
else :
employeeOvertimeHours = 0
return employeeOvertimeHours
def calculateTotalHours(employeeRegularHours, employeeOvertimeHours) :
employeeTotalHours = employeeRegularHours + employeeOvertimeHours
return employeeTotalHours
def calculateRegularPay(employeePayRate, employeeHoursWorked) :
regularPayAmount = employeePayRate * employeeHoursWorked
return regularPayAmount
def calculateOvertimePay(employeePayRate, employeeOvertimeHours) :
overtimePayRate = 1.5
overtimePayAmount = (employeePayRate * employeeOvertimeHours) * overtimePayRate
return overtimePayAmount
def calculateGrossPay(regularPayAmount, overtimePayAmount) :
grossPayAmount = regularPayAmount + overtimePayAmount
return grossPayAmount
def calculateFederalTax(grossPayAmount) :
federalTaxRate = 0.124
federalTaxWithheld = grossPayAmount * federalTaxRate
return federalTaxWithheld
def calculateStateTax(grossPayAmount) :
stateTaxRate = 0.049
stateTaxWithheld = grossPayAmount * stateTaxRate
return stateTaxWithheld
def calculateMedicareTax(grossPayAmount) :
medicareTaxRate = 0.014
medicareTaxWithheld = grossPayAmount * medicareTaxRate
return medicareTaxWithheld
def calculateSocSecTax(grossPayAmount) :
socSecTaxRate = 0.062
socSecTaxWithheld = grossPayAmount * socSecTaxRate
return socSecTaxWithheld
def calculateTotalTaxes(federalTaxWithheld, stateTaxWithheld, medicareTaxWithheld, socSecTaxWithheld) :
totalTaxesWithheld = federalTaxWithheld + stateTaxWithheld + medicareTaxWithheld + socSecTaxWithheld
return totalTaxesWithheld
def calculateNetPay(grossPayAmount, totalTaxesWithheld) :
netPayAmount = grossPayAmount - totalTaxesWithheld
return netPayAmount
def payrollSummaryReport(employeeFirstName, employeeLastName, employeePayRate, employeeRegularHours, employeeOvertimeHours, employeeTotalHours, regularPayAmount, overtimePayAmount, grossPayAmount, federalTaxWithheld, stateTaxWithheld, medicareTaxWithheld, socSecTaxWithheld, totalTaxesWithheld, netPayAmount) :
print()
print("\t\t\t\t\t\tPayroll Summary Report")
print()
print("%-12s%-12s%-8s%-10s%-10s%-12s%-10s%-11s%-13s%-10s" % ("LastName", "FirstName", "Hours", "RegHours", "OTHours", "RegPay", "OTPay", "GrossPay", "Deductions", "NetPay"))
print("%-12s%-12s%-8.2f%-10.2f%-10.2f$%-11.2f$%-9.2f$%-10.2f$%-12.2f$%-10.2f" % (employeeLastName, employeeFirstName, employeeTotalHours, employeeRegularHours, employeeOvertimeHours, regularPayAmount, overtimePayAmount, grossPayAmount, totalTaxesWithheld, netPayAmount))
main ()
The CSV file (employees.csv) I need to use looks like this:
First,Last,Hours,Pay
Matthew,Hightower,42,10
Samuel,Jackson,53,12.65
Catherine,Jones,35,19.43
Charlton,Heston,52,10
Karen,Black,40,12
Sid,Caesar,38,15
George,Kennedy,25,35
Linda,Blair,42,18.6
Beverly,Garland,63,10
Jerry,Stiller,52,15
Efrem,Zimbalist,34,16
Linda,Harrison,24,14
Erik,Estrada,41,15.5
Myrna,Loy,40,14.23

You can treat your .csv file as a regular one. No need for reader. Here is a function that might deal with your file:
def get_data(fname):
'''
Function returns the dictionary with following
format:
{ 0 : {
"fname": "...",
"lname": "...",
"gross": "...",
},
1 : {
....,
,,,,
},
}
'''
result = {} # return value
i = 0 # you can zip range() if you want to
with open(fname, 'r') as f:
for line in f.readlines()[1:]:
result[i] = {}
tmp = line.split(",") # list of values from file
# access file values by their index, e.g.
# tmp[0] -> first name
# tmp[1] -> last name
# tmp[2] -> hours
# tmp[3] -> pay rate
# do calculations using your functions (calculateOvertimePay,
# calculateTotalHours, etc.) and store the results in dictionary
# e.g:
result[i]["fname"] = tmp[0]
result[i]["lname"] = tmp[1]
# ...
# do calculations for report
# ...
# result[i]["regular"] = calc...(....)
# result[i]["overtime"] = calc...(....)
result[i]["gross"] = calculateGrossPay(result[i]["regular"], result[i]["overtime"])
i += 1
return result
There are several thing your might want to do with your payrollSummaryReport(...) function to improve it:
replace your huge argument list with dict, or list
tinker it a bit to fit your requirements
Your might do your improvements in this way:
def payrollSummaryReport(vals) :
print()
print("\t\t\t\t\t\tPayroll Summary Report")
print()
print("%-12s%-12s%-8s%-10s%-10s%-12s%-10s%-11s%-13s%-10s" %\
("LastName", "FirstName", "Hours", "RegHours", "OTHours", "RegPay", "OTPay", "GrossPay", "Deductions", "NetPay"))
for i in vals:
print("%-12s%-12s%-8.2f%-10.2f%-10.2f$%-11.2f$%-9.2f$%-10.2f$%-12.2f$%-10.2f" %\
(vals[i]["fname"], vals[i]["lname"], vals[i]["gross"], ''' repeat for all fields '''))

Related

Read data from txt file, store it, use it for analyzing, write it to the txt file

The task is to read from given txt file the data add the numbers in there to the list[], so that every number in a row will be a element/object in this list. After reading the file created list will be sent to the main().
this list with the objects will be parameters for the def Analyze part in where at the same time
will be found min, max, average and sum.
def lueTiedosto(data):
Tiedosto = open("L07T4D1.txt", 'r', encoding="UTF-8")
Rivi = Tiedosto.readline()
while (len(Rivi) > 0):
data.append(int(Rivi))
Rivi = Tiedosto.readline()
for element in data:
print(element)
print(f"Tiedosto L07T4D1.txt luettu.")
Tiedosto.close()
return element
The fixed code which works:
def lueTiedosto(data):
Lue = input("Luettavan tiedoston nimi on ''.\n")
print(f"Anna uusi nimi, enter säilyttää nykyisen: ", end='')
Tiedosto = open(Lue, 'r', encoding="UTF-8")
Rivi = Tiedosto.readline()
while (len(Rivi) > 0):
data.append(int(Rivi))
Rivi = Tiedosto.readline()
print(f"Tiedosto '{Lue}' luettu.")
Tiedosto.close()
return data
Making an assumption that your input file is similar to the following:
10000
12345
10008
12000
I would do the following:
filepath = r".......\L07T4D1.txt" # Path to file being loaded
def readData(filepath: str) -> list[int]:
# Returns a list of integers from file
rslt = []
with open (filepath, 'r') as f:
data = f.readline().strip()
while data:
data = data.split(' ')
rslt.append(int(data[0]))
data = f.readline().strip()
return rslt
def analyze(data: list[int]) -> None:
# prints results of data analysis
print(f'Max Value = {max(data)}')
print(f'Min Value = {min(data)}')
print(f'Sum Value = {sum(data)}')
print(f'Avg Value = {sum(data)/len(data)}')
Running analyze(readData(filepath)) Yields:
Max Value = 12345
Min Value = 10000
Sum Value = 44353
Avg Value = 11088.25

How can I speed these API queries up?

I am feeding a long list of inputs in a function that calls an API to retrieve data. My list is around 40.000 unique inputs. Currently, the function returns output every 1-2 seconds or so. Quick maths tells me that it would take over 10+ hrs before my function will be done. I therefore want to speed this process up, but have struggles finding a solution. I am quite a beginner, so threading/pooling is quite difficult for me. I hope someone is able to help me out here.
The function:
import quandl
import datetime
import numpy as np
quandl.ApiConfig.api_key = 'API key here'
def get_data(issue_date, stock_ticker):
# Prepare var
stock_ticker = "EOD/" + stock_ticker
# Volatility
date_1 = datetime.datetime.strptime(issue_date, "%d/%m/%Y")
pricing_date = date_1 + datetime.timedelta(days=-40) # -40 days of issue date
volatility_date = date_1 + datetime.timedelta(days=-240) # -240 days of issue date (-40,-240 range)
# Check if code exists : if not -> return empty array
try:
stock = quandl.get(stock_ticker, start_date=volatility_date, end_date=pricing_date) # get pricing data
except quandl.errors.quandl_error.NotFoundError:
return []
daily_close = stock['Adj_Close'].pct_change() # returns using adj.close
stock_vola = np.std(daily_close) * np.sqrt(252) # annualized volatility
# Average price
stock_pricing_date = date_1 + datetime.timedelta(days=-2) # -2 days of issue date
stock_pricing_date2 = date_1 + datetime.timedelta(days=-12) # -12 days of issue date
stock_price = quandl.get(stock_ticker, start_date=stock_pricing_date2, end_date=stock_pricing_date)
stock_price_average = np.mean(stock_price['Adj_Close']) # get average price
# Amihuds Liquidity measure
liquidity_pricing_date = date_1 + datetime.timedelta(days=-20)
liquidity_pricing_date2 = date_1 + datetime.timedelta(days=-120)
stock_data = quandl.get(stock_ticker, start_date=liquidity_pricing_date2, end_date=liquidity_pricing_date)
p = np.array(stock_data['Adj_Close'])
returns = np.array(stock_data['Adj_Close'].pct_change())
dollar_volume = np.array(stock_data['Adj_Volume'] * p)
illiq = (np.divide(returns, dollar_volume))
print(np.nanmean(illiq))
illiquidity_measure = np.nanmean(illiq, dtype=float) * (10 ** 6) # multiply by 10^6 for expositional purposes
return [stock_vola, stock_price_average, illiquidity_measure]
I then use a seperate script to select my csv file with the list with rows, each row containing the issue_date, stock_ticker
import function
import csv
import tkinter as tk
from tkinter import filedialog
# Open File Dialog
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
# Load Spreadsheet data
f = open(file_path)
csv_f = csv.reader(f)
next(csv_f)
result_data = []
# Iterate
for row in csv_f:
try:
return_data = function.get_data(row[1], row[0])
if len(return_data) != 0:
# print(return_data)
result_data_loc = [row[1], row[0]]
result_data_loc.extend(return_data)
result_data.append(result_data_loc)
except AttributeError:
print(row[0])
print('\n\n')
print(row[1])
continue
if result_data is not None:
with open('resuls.csv', mode='w', newline='') as result_file:
csv_writer = csv.writer(result_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for result in result_data:
# print(result)
csv_writer.writerow(result)
else:
print("No results found!")
It is quite messy, but like I mentioned before, I am definitely a beginner. Speeding this up would greatly help me.

python 3 tab-delimited file adds column file.write

I'm writing string entries to a tab-delimited file in Python 3. The code that I use to save the content is:
savedir = easygui.diropenbox()
savefile = input("Please type the filename (including extension): ")
file = open(os.path.join(savedir, savefile), "w", encoding="utf-8")
file.write("Number of entities not found: " + str(missing_count) + "\n")
sep = "\t"
for entry in entities:
file.write(entry[0]+"\t")
for item in entry:
file.write(sep.join(item[0]))
file.write("\t")
file.write("\n")
file.close()
The file saves properly. There are no errors or warnings sent to the terminal. When I open the file, I find an extra column has been saved to the file.
Query | Extra | Name
Abu-Jamal, Mumia | A | Mumia Abu-Jamal
Anderson, Walter | A | Walter Inglis Anderson
Anderson, Walter | A | Walter Inglis Anderson
I've added vertical bars between the tabs for clarity; they don't normally appear there. As well, I have removed a few columns at the end. The column between the vertical bars is not supposed to be there. The document that is saved to file is longer than three lines. On each line, the extra column is the first letter of the Query column. Hence, we have A's in these three examples.
entry[0] corresponds exactly to the value in the Query column.
sep.join(item[0]) corresponds exactly to columns 3+.
Any idea why I would be getting this extra column?
Edit: I'm adding the full code for this short script.
# =============================================================================
# Code to query DBpedia for named entities.
#
# =============================================================================
import requests
import xml.etree.ElementTree as et
import csv
import os
import easygui
import re
# =============================================================================
# Default return type is XML. Others: json.
# Classes are: Resource (general), Place, Person, Work, Species, Organization
# but don't include resource as one of the
# =============================================================================
def urlBuilder(query, queryClass="unknown", returns=10):
prefix = 'http://lookup.dbpedia.org/api/search/KeywordSearch?'
#Selects the appropriate QueryClass for the url
if queryClass == 'place':
qClass = 'QueryClass=place'
elif queryClass == 'person':
qClass = 'QueryClass=person'
elif queryClass == 'org':
qClass = 'QueryClass=organization'
else:
qClass = 'QueryClass='
#Sets the QueryString
qString = "QueryString=" + str(query)
#sets the number of returns
qHits = "MaxHits=" + str(returns)
#full url
dbpURL = prefix + qClass + "&" + qString + "&" + qHits
return dbpURL
#takes a xml doc as STRING and returns an array with the name and the URI
def getdbpRecord(xmlpath):
root = et.fromstring(xmlpath)
dbpRecord = []
for child in root:
temp = []
temp.append(child[0].text)
temp.append(child[1].text)
if child[2].text is None:
temp.append("Empty")
else:
temp.append(findDates(child[2].text))
dbpRecord.append(temp)
return dbpRecord
#looks for a date with pattern: 1900-01-01 OR 01 January 1900 OR 1 January 1900
def findDates(x):
pattern = re.compile('\d{4}-\d{2}-\d{2}|\d{2}\s\w{3,9}\s\d{4}|\d{1}\s\w{3,9}\s\d{4}')
returns = pattern.findall(x)
if len(returns) > 0:
return ";".join(returns)
else:
return "None"
#%%
# =============================================================================
# Build and send get requests
# =============================================================================
print("Please select the CSV file that contains your data.")
csvfilename = easygui.fileopenbox("Please select the CSV file that contains your data.")
lookups = []
name_list = csv.reader(open(csvfilename, newline=''), delimiter=",")
for name in name_list:
lookups.append(name)
#request to get the max number of returns from the user.
temp = input("Specify the maximum number of returns desired: ")
if temp.isdigit():
maxHits = temp
else:
maxHits = 10
queries = []
print("Building queries. Please wait.")
for search in lookups:
if len(search) == 2:
queries.append([search[0], urlBuilder(query=search[0], queryClass=search[1], returns=maxHits)])
else:
queries.append([search, urlBuilder(query=search, returns=maxHits)])
responses = []
print("Gathering responses. Please wait.")
for item in queries:
response = requests.get(item[1])
data = response.content.decode("utf-8")
responses.append([item[0], data])
entities = []
missing_count = 0
for item in responses:
temp = []
if len(list(et.fromstring(item[1]))) > 0:
entities.append([item[0], getdbpRecord(item[1])])
else:
missing_count += 1
print("There are " + str(missing_count) + " entities that were not found.")
print("Please select the destination folder for the results of the VIAF lookup.")
savedir = easygui.diropenbox("Please select the destination folder for the results of the VIAF lookup.")
savefile = input("Please type the filename (including extension): ")
file = open(os.path.join(savedir, savefile), "w", encoding="utf-8")
file.write("Number of entities not found: " + str(missing_count) + "\n")
sep = "\t"
for entry in entities:
file.write(entry[0]+"\t")
for item in entry:
file.write(sep.join(item[0]))
file.write("\t")
file.write("\n")
file.close()

Never resets list

I am trying to create a calorie counter the standard input goes like this:
python3 calories.txt < test.txt
Inside calories the food is the following format: apples 500
The problem I am having is that whenever I calculate the values for the person it seems to never return to an empty list..
import sys
food = {}
eaten = {}
finished = {}
total = 0
#mappings
def calories(x):
with open(x,"r") as file:
for line in file:
lines = line.strip().split()
key = " ".join(lines[0:-1])
value = lines[-1]
food[key] = value
def calculate(x):
a = []
for keys,values in x.items():
for c in values:
try:
a.append(int(food[c]))
except:
a.append(100)
print("before",a)
a = []
total = sum(a) # Problem here
print("after",a)
print(total)
def main():
calories(sys.argv[1])
for line in sys.stdin:
lines = line.strip().split(',')
for c in lines:
values = lines[0]
keys = lines[1:]
eaten[values] = keys
calculate(eaten)
if __name__ == '__main__':
main()
Edit - forgot to include what test.txt would look like:
joe,almonds,almonds,blue cheese,cabbage,mayonnaise,cherry pie,cola
mary,apple pie,avocado,broccoli,butter,danish pastry,lettuce,apple
sandy,zuchini,yogurt,veal,tuna,taco,pumpkin pie,macadamia nuts,brazil nuts
trudy,waffles,waffles,waffles,chicken noodle soup,chocolate chip cookie
How to make it easier on yourself:
When reading the calories-data, convert the calories to int() asap, no need to do it every time you want to sum up somthing that way.
Dictionary has a .get(key, defaultvalue) accessor, so if food not found, use 100 as default is a 1-liner w/o try: ... except:
This works for me, not using sys.stdin but supplying the second file as file as well instead of piping it into the program using <.
I modified some parsings to remove whitespaces and return a [(name,cal),...] tuplelist from calc.
May it help you to fix it to your liking:
def calories(x):
with open(x,"r") as file:
for line in file:
lines = line.strip().split()
key = " ".join(lines[0:-1])
value = lines[-1].strip() # ensure no whitespaces in
food[key] = int(value)
def getCal(foodlist, defValueUnknown = 100):
"""Get sum / total calories of a list of ingredients, unknown cost 100."""
return sum( food.get(x,defValueUnknown ) for x in foodlist) # calculate it, if unknown assume 100
def calculate(x):
a = []
for name,foods in x.items():
a.append((name, getCal(foods))) # append as tuple to list for all names/foods eaten
return a
def main():
calories(sys.argv[1])
with open(sys.argv[2]) as f: # parse as file, not piped in via sys.stdin
for line in f:
lines = line.strip().split(',')
for c in lines:
values = lines[0].strip()
keys = [x.strip() for x in lines[1:]] # ensure no whitespaces in
eaten[values] = keys
calced = calculate(eaten) # calculate after all are read into the dict
print (calced)
Output:
[('joe', 1400), ('mary', 1400), ('sandy', 1600), ('trudy', 1000)]
Using sys.stdin and piping just lead to my console blinking and waiting for manual input - maybe VS related...

Python script is locked when accessing SQLite database in loop

please watch through the code of my parser. It grabs some statistics from web pages accessing them in a loop and puts specified records in SQLite3 database.
Everything is going right until the line 87 (the SQL statement), where the process consumes all CPU resources and in fact get blocked.
File "./parser.py", line 86, in
while (j < i):
Database file in the beginning of the code is created with correct structure, so the problem is in loops. Inner block of main loop for season in season_list: works just fine. Here is the whole code of my script:
#!/usr/bin/env python
from bs4 import BeautifulStoneSoup
from urllib2 import urlopen
import re
import sqlite3
from time import gmtime, strftime
# Print start time
print "We started at ", strftime("%Y-%m-%d %H:%M:%S", gmtime())
# Create DB
print "Trying to create DB"
con = sqlite3.connect('england.db')
cur = con.cursor()
sql = """\
CREATE TABLE english_premier_league (
id_match INTEGER PRIMARY KEY AUTOINCREMENT,
season TEXT,
tour INTEGER,
date TEXT,
home TEXT,
visitor TEXT,
home_score INTEGER,
visitor_score INTEGER
);
"""
try:
cur.executescript(sql)
except sqlite3.DatabaseError as err:
print "Error creating database: ", err
else:
print "Succesfully created your database..."
con.commit()
cur.close()
con.close()
# list of variables
postfix = 2011
threshold = 1999
season_list = []
while postfix >= threshold:
end = (postfix + 1) % 2000
if (end >= 10):
season = str(postfix) + str(end)
else:
season = str(postfix) + str(0) + str(end)
season_list.append(season)
postfix -= 1
print season_list
# main loop
for season in season_list:
href = 'http://www.stat-football.com/en/a/eng.php?b=10&d='+season+'&c=51'
print href
xml = urlopen(href).read()
xmlSoup = BeautifulStoneSoup(xml)
tablet = xmlSoup.find(attrs={"class" : "bd5"})
#Access DB
con = sqlite3.connect('england.db')
cur = con.cursor()
#Parse site
tour = tablet.findAll(attrs = { "class" : re.compile(r"^(s3|cc s3)$") })
date = tablet.findAll(text = re.compile(r"(0[1-9]|[12][0-9]|3[01])\.(0[1-9]|1[012])\.(19|20)\d\d"))
home = tablet.findAll(attrs = {"class" : "nw"})
guest = tablet.findAll(attrs = {"class" : "s1"})
score = tablet.findAll(attrs = {"class" : "nw pr15"})
#
def parse_string(sequence):
result=[]
for unit in sequence:
text = ''.join(unit.findAll(text=True))
result.append(text.strip())
return result
tour_list=parse_string(tour)
home_list=parse_string(home)
guest_list=parse_string(guest)
score_list=parse_string(score)
#Loop over found records to put them into sqlite3 DB
i = len(tour_list)
j = 0
while (j < i):
sql_add = 'INSERT INTO english_premier_league (season, tour, date, home, visitor, home_score, visitor_score) VALUES (?, ?, ?, ?, ?, ?, ?)'
match = (season, int(tour_list[j]), date[j], home_list[j], guest_list[j], int(score_list[j][0:1]), int(score_list[j][2:3]))
try:
cur.executemany(sql_add, match)
except sqlite3.DatabaseError as err:
print "Error matching the record: ", err
else:
con.commit()
part = float(j)/float(i)*100
if (part%10 == 0):
print (int(part)), "%"
j += 1
cur.close()
con.close()
Also it may be useful to look at the end of strace output:
getcwd("/home/vitaly/football_forecast/epl", 512) = 35
stat("/home/vitaly/football_forecast/epl/england.db",
{st_mode=S_IFREG|0644, st_size=24576, ...}) = 0
open("/home/vitaly/football_forecast/epl/england.db", O_RDWR|O_CREAT,
0644) = 3 fcntl(3, F_GETFD) = 0 fcntl(3,
F_SETFD, FD_CLOEXEC) = 0 fstat(3, {st_mode=S_IFREG|0644,
st_size=24576, ...}) = 0 lseek(3, 0, SEEK_SET) = 0
read(3, "SQLite format 3\0\4\0\1\1\0# \0\0\1~\0\0\0\30"..., 100) =
100
I'm running Python 2.7 on Ubuntu 12.04. Thanks a lot.
Replace cur.executemany(sql_add, match) with cur.execute(sql_add, match). executemany() is used for performing the same operation multiple times over an iterable of values. For example, if you had this:
match = [ (season1, tour1, date1, home1, visitor1, home_score1, visitor_score1),
(season2, tour2, date2, home2, visitor2, home_score2, visitor_score2),
(season3, tour3, date3, home3, visitor3, home_score3, visitor_score3) ]
cur.executemany(sql_add, match)
... it would be appropriate, since the cursor could iterate over the tuples in match and perform the insert operation on each of them.

Resources