Any idea how the id is generated in the database? - python-3.x

I can't understand how the id is automatically generated into the database, I thought it is supposed to have "autoincrement" when add the id column in the "Pages" table
Also can anyone explain this code block, I could not understand the part "if not found : continue" :
for web in webs:
if ( href.startswith(web) ) :
found = True
break
if not found : continue
import sqlite3
import urllib.error
import ssl
from urllib.parse import urljoin
from urllib.parse import urlparse
from urllib.request import urlopen
from bs4 import BeautifulSoup
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
conn = sqlite3.connect('spider.sqlite')
cur = conn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS Pages
(id INTEGER PRIMARY KEY, url TEXT UNIQUE, html TEXT,
error INTEGER, old_rank REAL, new_rank REAL)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Links
(from_id INTEGER, to_id INTEGER, UNIQUE(from_id, to_id))''')
cur.execute('''CREATE TABLE IF NOT EXISTS Webs (url TEXT UNIQUE)''')
# Check to see if we are already in progress...
cur.execute('SELECT id,url FROM Pages WHERE html is NULL and error is NULL ORDER BY RANDOM() LIMIT 1')
row = cur.fetchone()
if row is not None:
print("Restarting existing crawl. Remove spider.sqlite to start a fresh crawl.")
else :
starturl = input('Enter web url or enter: ')
if ( len(starturl) < 1 ) : starturl = 'http://www.dr-chuck.com/'
if ( starturl.endswith('/') ) : starturl = starturl[:-1]
web = starturl
if ( starturl.endswith('.htm') or starturl.endswith('.html') ) :
pos = starturl.rfind('/')
web = starturl[:pos]
if ( len(web) > 1 ) :
cur.execute('INSERT OR IGNORE INTO Webs (url) VALUES ( ? )', ( web, ) )
cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( starturl, ) )
conn.commit()
# Get the current webs
cur.execute('''SELECT url FROM Webs''')
webs = list()
for row in cur:
webs.append(str(row[0]))
print(webs)
many = 0
while True:
if ( many < 1 ) :
sval = input('How many pages:')
if ( len(sval) < 1 ) : break
many = int(sval)
many = many - 1
cur.execute('SELECT id,url FROM Pages WHERE html is NULL and error is NULL ORDER BY RANDOM() LIMIT 1')
try:
row = cur.fetchone()
# print row
fromid = row[0]
url = row[1]
except:
print('No unretrieved HTML pages found')
many = 0
break
print(fromid, url, end=' ')
# If we are retrieving this page, there should be no links from it
cur.execute('DELETE from Links WHERE from_id=?', (fromid, ) )
try:
document = urlopen(url, context=ctx)
html = document.read()
if document.getcode() != 200 :
print("Error on page: ",document.getcode())
cur.execute('UPDATE Pages SET error=? WHERE url=?', (document.getcode(), url) )
if 'text/html' != document.info().get_content_type() :
print("Ignore non text/html page")
cur.execute('DELETE FROM Pages WHERE url=?', ( url, ) )
conn.commit()
continue
print('('+str(len(html))+')', end=' ')
soup = BeautifulSoup(html, "html.parser")
except KeyboardInterrupt:
print('')
print('Program interrupted by user...')
break
except:
print("Unable to retrieve or parse page")
cur.execute('UPDATE Pages SET error=-1 WHERE url=?', (url, ) )
conn.commit()
continue
cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( url, ) )
cur.execute('UPDATE Pages SET html=? WHERE url=?', (memoryview(html), url ) )
conn.commit()
# Retrieve all of the anchor tags
tags = soup('a')
count = 0
for tag in tags:
href = tag.get('href', None)
if ( href is None ) : continue
# Resolve relative references like href="/contact"
up = urlparse(href)
if ( len(up.scheme) < 1 ) :
href = urljoin(url, href)
ipos = href.find('#')
if ( ipos > 1 ) : href = href[:ipos]
if ( href.endswith('.png') or href.endswith('.jpg') or href.endswith('.gif') ) : continue
if ( href.endswith('/') ) : href = href[:-1]
# print href
if ( len(href) < 1 ) : continue
# Check if the URL is in any of the webs
found = False
for web in webs:
if ( href.startswith(web) ) :
found = True
break
if not found : continue
cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( href, ) )
count = count + 1
conn.commit()
cur.execute('SELECT id FROM Pages WHERE url=? LIMIT 1', ( href, ))
try:
row = cur.fetchone()
toid = row[0]
except:
print('Could not retrieve id')
continue
# print fromid, toid
cur.execute('INSERT OR IGNORE INTO Links (from_id, to_id) VALUES ( ?, ? )', ( fromid, toid ) )
print(count)
cur.close()

Related

Failed to Retrieve HTTP Error 401: Unauthorized

I'm running a python script to connect to the twitter api and count my friends. When I run it I get the 401 error in the title. Any help would be appreciated.
`
import urllib.request, urllib.parse, urllib.error
import twurl
import json
import sqlite3
import ssl
TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'
conn = sqlite3.connect('friends.sqlite')
cur = conn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS People
(id INTEGER PRIMARY KEY, name TEXT UNIQUE, retrieved INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Follows
(from_id INTEGER, to_id INTEGER, UNIQUE(from_id, to_id))''')
Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
while True:
acct = input('Enter a Twitter account, or quit: ')
if (acct == 'quit'): break
if (len(acct) < 1):
cur.execute('SELECT id, name FROM People WHERE retrieved=0 LIMIT 1')
try:
(id, acct) = cur.fetchone()
except:
print('No unretrieved Twitter accounts found')
continue
else:
cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1',
(acct, ))
try:
id = cur.fetchone()[0]
except:
cur.execute('''INSERT OR IGNORE INTO People
(name, retrieved) VALUES (?, 0)''', (acct, ))
conn.commit()
if cur.rowcount != 1:
print('Error inserting account:', acct)
continue
id = cur.lastrowid
url = twurl.augment(TWITTER_URL, {'screen_name': acct, 'count': '100'})
print('Retrieving account', acct)
try:
connection = urllib.request.urlopen(url, context=ctx)
except Exception as err:
print('Failed to Retrieve', err)
break
data = connection.read().decode()
headers = dict(connection.getheaders())
print('Remaining', headers['x-rate-limit-remaining'])
try:
js = json.loads(data)
except:
print('Unable to parse json')
print(data)
break
Debugging
print(json.dumps(js, indent=4))
if 'users' not in js:
print('Incorrect JSON received')
print(json.dumps(js, indent=4))
continue
cur.execute('UPDATE People SET retrieved=1 WHERE name = ?', (acct, ))
countnew = 0
countold = 0
for u in js['users']:
friend = u['screen_name']
print(friend)
cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1',
(friend, ))
try:
friend_id = cur.fetchone()[0]
countold = countold + 1
except:
cur.execute('''INSERT OR IGNORE INTO People (name, retrieved)
VALUES (?, 0)''', (friend, ))
conn.commit()
if cur.rowcount != 1:
print('Error inserting account:', friend)
continue
friend_id = cur.lastrowid
countnew = countnew + 1
cur.execute('''INSERT OR IGNORE INTO Follows (from_id, to_id)
VALUES (?, ?)''', (id, friend_id))
print('New accounts=', countnew, ' revisited=', countold)
print('Remaining', headers['x-rate-limit-remaining'])
conn.commit()
cur.close()`
Also, I am running this in Visual Studio Code in case that matters.
I tried to cd into the proper folder as at first Visual Studio Code was running the code from a different folder and I thought it didn't have the files it needed to import, etc. I was expecting the file to be found so I could run the program correctly, I assume right now I am having troubles with the OAuth.

how to work with foreign key field in django

Hi Everyone i am working work django framework, where i used to upload excel file in Dailytrip table, current i get car_mumber from car table, but now i need to store car_number from Car_team table also team_id, i am storing car_id and team_id in car_team table also i need to store team_id in dailytrip table automaticlly based on car_id(car_number) i am to much confuse how to i work that, pls help me out
models.py
class Car_team(BaseModel):
team = models.ForeignKey(
Team,
models.CASCADE,
verbose_name='Team',
null=True,
)
car=models.ForeignKey(
Car,
models.CASCADE,
verbose_name='Car',
null=True)
city =models.ForeignKey(
City,
models.CASCADE,
verbose_name='City',
)
start_date=models.DateField(null=True, blank=True)
end_date=models.DateField(null=True, blank=True)
views.py
def add_payout_uber_daily_data(request):
if request.method == 'POST':
form = UberPerformanceDataForm(request.POST, request.FILES, request=request)
if form.is_valid():
date = form.cleaned_data['date']
excel_file = request.FILES['file']
df = pd.read_excel(excel_file)
is_na = pd.isna(df['Date']).sum().sum() + pd.isna(df['Name']).sum().sum() + pd.isna(df['UUID']).sum().sum() + pd.isna(df['Net Fare With Toll']).sum().sum() + pd.isna(df['Trips']).sum().sum() + pd.isna(df['Uber KMs']).sum().sum() + pd.isna(df['CashCollected']).sum().sum() + pd.isna(df['UberToll']).sum().sum() + pd.isna(df['Tips']).sum().sum() + pd.isna(df['Hours Online']).sum().sum() + pd.isna(df['Ratings']).sum().sum() + pd.isna(df['Acceptance Rate']).sum().sum() + pd.isna(df['Cancellation Rate']).sum().sum()
error_list = []
if is_na > 0:
error_list.append('Found #N/A or blank values in the sheet. Please correct and re-upload')
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
date_match = True
for d in df['Date']:
if str(d.strftime("%Y-%m-%d")) != str(date):
date_match = False
break
if not date_match:
error_list.append('Some dates are not matching in excel')
if len(error_list) > 0:
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
DailyTrip.objects.filter(date=date).update(is_active=0)
for i in df.index:
uuid = df['UUID'][i]
driver_id = None
car_id = None
fleet_id = None
manager_id = None
try:
driver = Driver.objects.get(uber_uuid=uuid)
driver_id = driver.id
except Driver.DoesNotExist:
driver_id = None
#replce car code and store car_number,car_id,team_id via car_team only this logic need to change current get car_number direct car table but we need car_number vai foriegn key
try:
car = Car.objects.get(car_number=df["Car Number"][i])
car_id = car.id
manager_id = car.manager_id
except Car.DoesNotExist:
car_id = None
try:
fleet = Fleet.objects.get(name=df["Fleet Name"][i])
fleet_id = fleet.id
except Fleet.DoesNotExist:
fleet_id = None
name = df['Name'][i]
car_number = df['Car Number'][i]
fare_total = df['Net Fare With Toll'][i]
trips = df['Trips'][i]
pool_trips = 0
hours_online = df['Hours Online'][i]
total_km = df['Uber KMs'][i]
cash_collected = abs(df['CashCollected'][i])
toll = df['UberToll'][i]
tip_amount = df['Tips'][i]
fare_avg = float(fare_total)/int(trips)
fare_per_hour_online = float(fare_total)/float(hours_online)
fare_per_km = fare_total/total_km
trips_per_hour = trips/hours_online
km_per_trip = total_km/trips
rating = df['Ratings'][i]
acceptance_rate_perc = float(df['Acceptance Rate'][i])/100
driver_cancellation_rate = float(df['Cancellation Rate'][i])/100
obj, created = DailyTrip.all_objects.update_or_create(
date=date, uuid=uuid,
defaults={
'car_id': car_id,
'manager_id': manager_id,
'car_number': car_number,
'driver_id': driver_id,
'car_id': car_id,
'fleet_id': fleet_id,
'driver_name': name,
'fare_total': fare_total,
'trips': trips,
'pool_trips': pool_trips,
'hours_online': hours_online,
'total_km': total_km,
'cash_collected': cash_collected,
'toll': toll,
'tip_amount': tip_amount,
'fare_avg': fare_avg,
'fare_per_hour_online':fare_per_hour_online,
'fare_per_km':fare_per_km,
'trips_per_hour': trips_per_hour,
'km_per_trip': km_per_trip,
'rating': rating,
'acceptance_rate_perc': acceptance_rate_perc,
'driver_cancellation_rate': driver_cancellation_rate,
'is_active': 1,
'comments': None}
)
if len(error_list) > 0:
DailyTrip.objects.filter(date=date).update(is_active=0)
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
else:
messages.success(request, 'Daily Trips added Successfully...')
return redirect('/fleet/payout/daily_data/add/uber')
else:
form = UberPerformanceDataForm(initial={})
context = {
'menu_payout': 'active',
'submenu_daily_data': 'active',
'form': form,
}
return render(request, 'add_payout_uber_daily_data.html', context=context)
You can try that :
to get car_number from car_team -->
car_team = car_team.objects.objects.all().last() # to get the last car_team for example
car_number = car_team.car.car_number # to get the car number from the car_team
try:
car = Car.objects.get(car_number=df["Car Number"][i])
car_id = car.id
car1=Car_team.objects.filter(car_id=car_id)
if car1:
team_id=car1[0].team_id
else:
team_id=None
except Car.DoesNotExist:
car_id = None
team_id= None

how to fetch data stored in sqlite3 database and assign it to the variables in tkinter python

In the code below I saved value1 and value2 to the sqlite3 database and txt_ in folder named data.
What I am trying to achieve here is that when I rerun the programme and open the file, txt_ file should be open in the text area with the lines I added when I saved it. And when I click add button, value1 and value2 should be updated and newly created line should be in the next line.
Let me know if my method is correct, if not then please tell me the better one.
CODE:
from tkinter import *
from tkinter import messagebox
import sqlite3
import os
root = Tk()
root.geometry('400x400')
var_e = StringVar(None)
def create_my_db():
conn = sqlite3.connect(database=r'my db.db')
cur = conn.cursor()
cur.execute("""CREATE TABLE IF NOT EXISTS "myLogs"
(
"int_value" INTEGER,
"float_value" REAL
)
""")
conn.commit()
create_my_db()
def add_lbl():
global value1, value2
value1 += 1
value2 += 1
sample = f'This is line {value1} which has value of {value2}\n'
txt_.insert(END, sample)
def save():
conn = sqlite3.connect(database=r'my db.db')
cur = conn.cursor()
cur.execute("""INSERT INTO myLogs VALUES (?,?)""",
(
value1,
value2
)
)
conn.commit()
# labels to check if the values are stored in the database
values_lbl.config(text=f'value 1 is [ {value1} ] & value 2 is [ {value2} ]')
def save_txt():
file_txt = open(f'data/{value1}.txt', 'w')
file_txt.write(txt_.get(1.0, END))
file_txt.close()
messagebox.showinfo('SAVED', 'Data saved to the database.')
def open_():
for txt_file in os.listdir("data/"):
if txt_file.split('.')[0] == f'{var_e.get()}':
file_ = open(f"data/{txt_file}", "r")
for i in file_:
txt_.insert(END, i)
file_.close()
value1 = 0
value2 = 0.9
values_lbl = Label(root, text=f'value 1 is [ {value1} ] & value 2 is [ {value2} ]')
values_lbl.pack()
btn_frame = Frame(root)
btn_frame.pack()
btn_add = Button(btn_frame, text='Add', command=add_lbl)
btn_add.pack(side=LEFT)
e = Entry(btn_frame, textvariable=var_e)
e.pack(side=LEFT)
btn_open = Button(btn_frame, text='Open', command=open_)
btn_save = Button(btn_frame, text='Save', command=lambda:[save(), save_txt()])
btn_open.pack(side=LEFT)
btn_save.pack(side=LEFT)
txt_ = Text(root)
txt_.pack(fill=BOTH, expand=True)
root.mainloop()
When I posted this question I didn't know how to run the query to update value1 and value2, that's why I didn't mention the query in open_() function. Now I came to know how that query should have been done. So in the below code I added the query to open_() function. Now the complete programme runs fine.
def open_():
global value1, value2
txt_.delete(1.0, END)
for txt_file in os.listdir("data/"):
if txt_file.split('.')[0] == f'{var_e.get()}':
file_ = open(f"data/{txt_file}", "r")
for i in file_:
txt_.insert(END, i)
file_.close()
conn = sqlite3.connect(database=r'my db.db')
cur = conn.cursor()
cur.execute("""SELECT * FROM myLogs WHERE int_value=?""", (var_e.get(),))
row = cur.fetchone()
if row is None:
messagebox.showerror("ERROR", 'Invalid input.')
else:
value1 = row[0]
value2 = row[1]
conn.commit()

Unable to scrape all data

from bs4 import BeautifulSoup
import requests , sys ,os
import pandas as pd
URL = r"https://www.vault.com/best-companies-to-work-for/law/top-100-law-firms-rankings/year/"
My_list = ['2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020']
Year= []
CompanyName = []
Rank = []
Score = []
print('\n>>Process started please wait\n\n')
for I, Page in enumerate(My_list, start=1):
url = r'https://www.vault.com/best-companies-to-work-for/law/top-100-law-firms-rankings/year/{}'.format(Page)
print('\nData fetching from : ',url)
Res = requests.get(url)
soup = BeautifulSoup(Res.content , 'html.parser')
data = soup.find('section',{'class': 'search-result CompanyWorkfor RankingMain FindSchools school-results contrastSection d-flex justify-content-center min-height Rankings CompRank'})
if len(soup) > 0:
print("\n>>Getting page source for :" , url)
else:
print("Please Check url :",url)
for i, item in enumerate(data.find_all("div", {"class": "RankItem"})):
year = item.find("i",{"class":"fa-stack fa-2x"})
Year.append(year)
title = item.find("h3", {"class": "MainLink"}).get_text().strip()
CompanyName.append(title)
rank = item.find("div", {"class": "RankNumber"}).get_text().strip()
Rank.append(rank)
score = item.find("div", {"class": "score"}).get_text().strip()
Score.append(score)
Data = pd.DataFrame({"Year":Year,"CompanyName":CompanyName,"Rank":Rank,"Score":Score})
Data[['First','Score']] = Data.Score.str.split(" " , expand =True,)
Data[['hash','Rank']] = Data.Rank.str.split("#" , expand = True,)
Data.drop(columns = ['hash','First'],inplace = True)
Data.to_csv('Vault_scrap.csv',index = False)
For each url the expected output Data for year, rank, title and score is 100 lines, but I'm getting only 10 lines.
You can iterate through the year and pages like this.
import requests
import pandas as pd
url = 'https://www.vault.com/vault/api/Rankings/LoadMoreCompanyRanksJSON'
def page_loop(year, url):
tableReturn = pd.DataFrame()
for page in range(1,101):
payload = {
'rank': '2',
'year': year,
'category': 'LBACCompany',
'pg': page}
jsonData = requests.get(url, params=payload).json()
if jsonData == []:
return tableReturn
else:
print ('page: %s' %page)
tableReturn = tableReturn.append(pd.DataFrame(jsonData), sort=True).reset_index(drop=True)
return tableReturn
results = pd.DataFrame()
for year in range(2007,2021):
print ("\n>>Getting page source for :" , year)
jsonData = page_loop(year, url)
results = results.append(pd.DataFrame(jsonData), sort=True).reset_index(drop=True)

Python script is locked when accessing SQLite database in loop

please watch through the code of my parser. It grabs some statistics from web pages accessing them in a loop and puts specified records in SQLite3 database.
Everything is going right until the line 87 (the SQL statement), where the process consumes all CPU resources and in fact get blocked.
File "./parser.py", line 86, in
while (j < i):
Database file in the beginning of the code is created with correct structure, so the problem is in loops. Inner block of main loop for season in season_list: works just fine. Here is the whole code of my script:
#!/usr/bin/env python
from bs4 import BeautifulStoneSoup
from urllib2 import urlopen
import re
import sqlite3
from time import gmtime, strftime
# Print start time
print "We started at ", strftime("%Y-%m-%d %H:%M:%S", gmtime())
# Create DB
print "Trying to create DB"
con = sqlite3.connect('england.db')
cur = con.cursor()
sql = """\
CREATE TABLE english_premier_league (
id_match INTEGER PRIMARY KEY AUTOINCREMENT,
season TEXT,
tour INTEGER,
date TEXT,
home TEXT,
visitor TEXT,
home_score INTEGER,
visitor_score INTEGER
);
"""
try:
cur.executescript(sql)
except sqlite3.DatabaseError as err:
print "Error creating database: ", err
else:
print "Succesfully created your database..."
con.commit()
cur.close()
con.close()
# list of variables
postfix = 2011
threshold = 1999
season_list = []
while postfix >= threshold:
end = (postfix + 1) % 2000
if (end >= 10):
season = str(postfix) + str(end)
else:
season = str(postfix) + str(0) + str(end)
season_list.append(season)
postfix -= 1
print season_list
# main loop
for season in season_list:
href = 'http://www.stat-football.com/en/a/eng.php?b=10&d='+season+'&c=51'
print href
xml = urlopen(href).read()
xmlSoup = BeautifulStoneSoup(xml)
tablet = xmlSoup.find(attrs={"class" : "bd5"})
#Access DB
con = sqlite3.connect('england.db')
cur = con.cursor()
#Parse site
tour = tablet.findAll(attrs = { "class" : re.compile(r"^(s3|cc s3)$") })
date = tablet.findAll(text = re.compile(r"(0[1-9]|[12][0-9]|3[01])\.(0[1-9]|1[012])\.(19|20)\d\d"))
home = tablet.findAll(attrs = {"class" : "nw"})
guest = tablet.findAll(attrs = {"class" : "s1"})
score = tablet.findAll(attrs = {"class" : "nw pr15"})
#
def parse_string(sequence):
result=[]
for unit in sequence:
text = ''.join(unit.findAll(text=True))
result.append(text.strip())
return result
tour_list=parse_string(tour)
home_list=parse_string(home)
guest_list=parse_string(guest)
score_list=parse_string(score)
#Loop over found records to put them into sqlite3 DB
i = len(tour_list)
j = 0
while (j < i):
sql_add = 'INSERT INTO english_premier_league (season, tour, date, home, visitor, home_score, visitor_score) VALUES (?, ?, ?, ?, ?, ?, ?)'
match = (season, int(tour_list[j]), date[j], home_list[j], guest_list[j], int(score_list[j][0:1]), int(score_list[j][2:3]))
try:
cur.executemany(sql_add, match)
except sqlite3.DatabaseError as err:
print "Error matching the record: ", err
else:
con.commit()
part = float(j)/float(i)*100
if (part%10 == 0):
print (int(part)), "%"
j += 1
cur.close()
con.close()
Also it may be useful to look at the end of strace output:
getcwd("/home/vitaly/football_forecast/epl", 512) = 35
stat("/home/vitaly/football_forecast/epl/england.db",
{st_mode=S_IFREG|0644, st_size=24576, ...}) = 0
open("/home/vitaly/football_forecast/epl/england.db", O_RDWR|O_CREAT,
0644) = 3 fcntl(3, F_GETFD) = 0 fcntl(3,
F_SETFD, FD_CLOEXEC) = 0 fstat(3, {st_mode=S_IFREG|0644,
st_size=24576, ...}) = 0 lseek(3, 0, SEEK_SET) = 0
read(3, "SQLite format 3\0\4\0\1\1\0# \0\0\1~\0\0\0\30"..., 100) =
100
I'm running Python 2.7 on Ubuntu 12.04. Thanks a lot.
Replace cur.executemany(sql_add, match) with cur.execute(sql_add, match). executemany() is used for performing the same operation multiple times over an iterable of values. For example, if you had this:
match = [ (season1, tour1, date1, home1, visitor1, home_score1, visitor_score1),
(season2, tour2, date2, home2, visitor2, home_score2, visitor_score2),
(season3, tour3, date3, home3, visitor3, home_score3, visitor_score3) ]
cur.executemany(sql_add, match)
... it would be appropriate, since the cursor could iterate over the tuples in match and perform the insert operation on each of them.

Resources