I am trying to create a function that will accept a dataframe and will parse that dataframe into a sql server table. I am stuck as to what needs go in the select statement below the insert query.
df- dataframe
desttable - destination table that needs to be parsed.
tablecols - An array of the table columns for the table
# Insert DataFrame to Table
def InsertintoDb(self, df, desttable, tablecols):
tablecolnames = ','.join(tablecols)
qmark = ['?' for s in tablecols]
allqmarks = ','.join(qmark)
#rowappendcolname = ','.join(['row.' + s for s in tablecols])
for index, row in df.iterrows():
cursor.execute(
'''INSERT INTO [Py_Test].[dbo].''' + desttable + ''' ( ''' + tablecolnames + ''')VALUES (''' + allqmarks + ''')''',
)
self.conn.commit()
Any help is much appreciated.
As suggested by the gentleman in the comment, I was able to do it using df.to_sql . Here is the working code -
class DbOps:
def __init__(self):
self.username = ''
self.password = ''
self.ipaddress = 'localhost'
# self.port = 5439
self.dbname = ''
# A long string that contains the necessary Postgres login information
self.engine = sqlalchemy.create_engine(
f"mssql+pyodbc://{self.username}:%s#{self.ipaddress}/{self.dbname}?driver=SQL+Server+Native+Client+11.0" % urlquote(f'
{self.password }'))
def InsertintoDb(self, df, desttable, tablecols):
df.to_sql(desttable, self.engine, index=False, if_exists='append')
Related
Python 3.8.10
streamlit==1.9.0
pandas==1.4.2
psycopg2-binary==2.9.3
Loading a Postgres table directly into a Pandas DataFrame with the following code.
df = pd.DataFrame(run_query("SELECT * FROM schema.tablename;"))
Displaying it with either streamlit.dataframe(df) or streamlit.write(df) loses the column names.
In order to capture the column names, I use this kluge.
# Initialize connection.
#st.experimental_singleton
def init_connection():
return psycopg2.connect(**st.secrets["postgresservername"])
conn = init_connection()
# Perform query.
#st.experimental_memo(ttl=600)
def run_query(query):
with conn.cursor() as cur:
cur.execute(query)
return cur.fetchall()
def load_table_as_dataframe(table):
# This is super klugy.
data = run_query("SELECT * FROM schema.{};".format(str(table)))
columns = run_query("SELECT *FROM information_schema.columns WHERE table_schema = 'schema' AND table_name = '{}';".format(str(table)))
# Fish out the actual column names
columns = [c[3] for c in columns]
df = pd. DataFrame(data, columns = columns)
return df
df = load_table_as_dataframe("tablename")
Which works...
Is there a better way to collect the needed data (and columns names) into a Pandas DataFrame within Postgres and Streamlit?
Using...
df = pd.read_sql("SELECT * FROM schema.{};".format(str(table)), conn)
...solved the issue. (Thx #parfait)
I'm pretty new to python or programming at all so I'd like to get help on the following problem
My table is set up like this:
https://i.imgur.com/KFPq2DI.png
Now I try to count all '✓' and set the number to column Teilnahme.
teilnahmencounter(ctx):
i=0
# Connect
connection = sqlite3.connect("kekse.db")
# cursor
cursor = connection.cursor()
# Abfrage
sql = "SELECT * FROM kekse ORDER BY ID"
cursor.execute(sql)
connection.commit()
for dsatz in cursor:
i = 0
for x in range(2 , 19):
if str(dsatz[x]) == '✓':
i += 1
cursor.execute('UPDATE kekse SET Teilnahme='+str(i)+' WHERE ID=?', dsatz[0]
)
connection.commit()
#print(dsatz[1], i, "Teilnahmen")
connection.close()
Try and use cast in your update where its getting updated -
import sqlite3
# Connect
con = sqlite3.connect("dbname.db")
# cursor
cur = con.cursor()
for row in cur.execute("select * from testtab"):
print(row)
cur.execute("update testtab set id=21 where id = cast("+str(2)+" as int)")
con.commit()
con.close()
In the code below I saved value1 and value2 to the sqlite3 database and txt_ in folder named data.
What I am trying to achieve here is that when I rerun the programme and open the file, txt_ file should be open in the text area with the lines I added when I saved it. And when I click add button, value1 and value2 should be updated and newly created line should be in the next line.
Let me know if my method is correct, if not then please tell me the better one.
CODE:
from tkinter import *
from tkinter import messagebox
import sqlite3
import os
root = Tk()
root.geometry('400x400')
var_e = StringVar(None)
def create_my_db():
conn = sqlite3.connect(database=r'my db.db')
cur = conn.cursor()
cur.execute("""CREATE TABLE IF NOT EXISTS "myLogs"
(
"int_value" INTEGER,
"float_value" REAL
)
""")
conn.commit()
create_my_db()
def add_lbl():
global value1, value2
value1 += 1
value2 += 1
sample = f'This is line {value1} which has value of {value2}\n'
txt_.insert(END, sample)
def save():
conn = sqlite3.connect(database=r'my db.db')
cur = conn.cursor()
cur.execute("""INSERT INTO myLogs VALUES (?,?)""",
(
value1,
value2
)
)
conn.commit()
# labels to check if the values are stored in the database
values_lbl.config(text=f'value 1 is [ {value1} ] & value 2 is [ {value2} ]')
def save_txt():
file_txt = open(f'data/{value1}.txt', 'w')
file_txt.write(txt_.get(1.0, END))
file_txt.close()
messagebox.showinfo('SAVED', 'Data saved to the database.')
def open_():
for txt_file in os.listdir("data/"):
if txt_file.split('.')[0] == f'{var_e.get()}':
file_ = open(f"data/{txt_file}", "r")
for i in file_:
txt_.insert(END, i)
file_.close()
value1 = 0
value2 = 0.9
values_lbl = Label(root, text=f'value 1 is [ {value1} ] & value 2 is [ {value2} ]')
values_lbl.pack()
btn_frame = Frame(root)
btn_frame.pack()
btn_add = Button(btn_frame, text='Add', command=add_lbl)
btn_add.pack(side=LEFT)
e = Entry(btn_frame, textvariable=var_e)
e.pack(side=LEFT)
btn_open = Button(btn_frame, text='Open', command=open_)
btn_save = Button(btn_frame, text='Save', command=lambda:[save(), save_txt()])
btn_open.pack(side=LEFT)
btn_save.pack(side=LEFT)
txt_ = Text(root)
txt_.pack(fill=BOTH, expand=True)
root.mainloop()
When I posted this question I didn't know how to run the query to update value1 and value2, that's why I didn't mention the query in open_() function. Now I came to know how that query should have been done. So in the below code I added the query to open_() function. Now the complete programme runs fine.
def open_():
global value1, value2
txt_.delete(1.0, END)
for txt_file in os.listdir("data/"):
if txt_file.split('.')[0] == f'{var_e.get()}':
file_ = open(f"data/{txt_file}", "r")
for i in file_:
txt_.insert(END, i)
file_.close()
conn = sqlite3.connect(database=r'my db.db')
cur = conn.cursor()
cur.execute("""SELECT * FROM myLogs WHERE int_value=?""", (var_e.get(),))
row = cur.fetchone()
if row is None:
messagebox.showerror("ERROR", 'Invalid input.')
else:
value1 = row[0]
value2 = row[1]
conn.commit()
I have a direct CSV stream from an API that I am using to push data into a database with the following code:
def loadData(data, engine) :
stream = data.content
try:
df = pd.read_csv(io.StringIO(stream.decode('utf-8')))
df['Snapshot'] = datetime.datetime.now()
if file.split(".")[0] == "SIM_SDS" :
df.to_sql(file.split(".")[0], engine, if_exists='append', index=False, infer_datetime_format = True)
else :
df.to_sql(file.split(".")[0], engine, if_exists='replace', index=False, infer_datetime_format = True)
except:
print('Loading data to the database failed for ' + file)
The problem I am having is the datetime format that is coming in is YYYY-MM-DDTHH:MM:SS.SSSZ (EG. 2017-10-30T20:26:22.731Z)
I have found a lot of different versions where there is just a T or a T with a +0300, but I have yet to find this particular format and I can't seem to get it to parse these as datetimes. The other piece of this challenge is that the datetime fields aren't the same in every single case, so I also have to have a way of recognizing this format as a datetime.
The problem this creates is when it creates the table in the database since the fields are shown as text; this leads to issues when these are used in tableau for datediff type work.
Try 1:
dateCol = [col for col in df.columns if 'Date' in col]
for col in dateCol:
df[col] = df[col].map(lambda x: x.strip('T').strip('Z'))
print(df[col])
This failed, I think because of the NaN issue where there is no date in the field.
Got it to work with the following:
def loadData(data, engine) :
stream = data.content
try:
df = pd.read_csv(io.StringIO(stream.decode('utf-8')))
df['Snapshot'] = datetime.datetime.now()
df.where(df.notnull(),None)
dateCol = [col for col in df.columns if 'Date' in col]
for col in dateCol:
df[col] = pd.to_datetime(df[col])
if file.split(".")[0] == "SIM_SDS" :
df.to_sql(file.split(".")[0], engine, if_exists='append', index=False)
else :
df.to_sql(file.split(".")[0], engine, if_exists='replace', index=False)
except:
print('Loading data to the database failed for ' + file)
This results in the proper format of the table fields I know for sure should be datetimes being datetimes.
Feel free to give me suggestions to improve though.
a datebase contained 80 tables ,every table is a 18000x4 matrix ,how can I translate these to a 80x18000x4 numpy array?
the db : https://drive.google.com/open?id=0B3bNSNFik5wGdm1DYnJwNHBJMVU
I wrote a function. Do you have any better idea?
import sqlite3 as sql
import os
def db_to_array(db,r,c):
db = sql.connect(db)
cursor = db.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
wd=os.getcwd()
if not os.path .exists(wd +'/temp/'):
os.makedirs(wd +'/temp/')
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query("SELECT * from %s" % table_name, db)
table.to_csv(wd +'/temp/'+ table_name + '.csv', index_label='index')
ss=os.listdir(wd +'/temp/')
print(len(ss))
dd=np.empty([len(ss),r,c])
for i in range(len(ss)):
ddd=np.array(pd.read_csv(wd +'/temp/'+ss[i]))
print(i)
print(ddd.shape)
dd[i,:,:]=ddd[:,0:r]
return dd