The below query makes a result set in the variable 'result'
I need to insert that into the iconndest (the new MySQL server). But I have no idea how to insert the query result into the new table? I just want to do Insert into DB.TBL SELECT * FROM RESULT. But I am not sure how?
import mysql.connector
import pandas as pd
from sqlalchemy import create_engine
import multiprocessing as mp
from multiprocessing import cpu_count
try:
engine_source = create_engine("CONN STRING")
iconn = engine_source.connect()
result = iconn.execute('SELECT QUERY')
print('EXTRACT COMPLETE')
engine_dest = create_engine("CONN STRING")
iconndest = engine_dest.connect()
iconndest.execute('SELECT * from ')
engine_source.dispose()
engine_dest.dispose()
except Exception as e:
print('extract: ' + str(e))
What you describe is very simple if we use .mappings() to convert the list of Row objects to a list of RowMapping objects when we retrieve the results. RowMapping objects behave like dict objects when passed as parameter values:
import sqlalchemy as sa
source_engine = sa.create_engine("mssql+pyodbc://scott:tiger^5HHH#mssql_199")
destination_engine = sa.create_engine("sqlite://")
with source_engine.begin() as conn:
results = (
conn.exec_driver_sql(
"""\
SELECT 1 AS id, N'foo' AS txt
UNION ALL
SELECT 2 AS id, N'bar' AS txt
"""
)
.mappings()
.all()
)
print(results)
# [{'id': 1, 'txt': 'foo'}, {'id': 2, 'txt': 'bar'}]
destination_engine.echo = True
with destination_engine.begin() as conn:
conn.exec_driver_sql("CREATE TABLE t (id int, txt varchar(10))")
conn.execute(
sa.text("INSERT INTO t (id, txt) VALUES (:id, :txt)"), results
)
"""SQL emitted:
INSERT INTO t (id, txt) VALUES (?, ?)
[generated in 0.00038s] ((1, 'foo'), (2, 'bar'))
"""
I have a small pyhton code that build a dataframe with one (or more) nans and then write it to a postgres database with psycopg2 module using copy_from function. Here it is:
table_name = "test"
df = pd.DataFrame([[1.0, 2.0], [3.0, np.nan]], columns=["VALUE0", "VALUE1"], index=pd.date_range("2000-01-01", "2000-01-02"))
database = "xxxx"
user = "xxxxxxx"
password = "xxxxxx"
host = "127.0.0.1"
port = "xxxxx"
def nan_to_null(f,
_NULL=psycopg2.extensions.AsIs('NULL'),
_NaN=np.NaN,
_Float=psycopg2.extensions.Float):
if f != f:
return _NULL
else:
return _Float(f)
psycopg2.extensions.register_adapter(float, nan_to_null)
psycopg2.extensions.register_adapter(np.float, nan_to_null)
psycopg2.extensions.register_adapter(np.float64, nan_to_null)
with psycopg2.connect(database=database,
user=user,
password=password,
host=host,
port=port) as conn:
try:
with conn.cursor() as cur:
cmd = "CREATE TABLE {} (TIMESTAMP timestamp PRIMARY KEY NOT NULL, VALUE0 FLOAT, VALUE1 FLOAT)"
cur.execute(sql.SQL(cmd).format(sql.Identifier(table_name)))
buffer = StringIO()
df.to_csv(buffer, index_label='TIMESTAMP', header=False)
buffer.seek(0)
cur.copy_from(buffer, table_name, sep=",")
conn.commit()
except Exception as e:
conn.rollback()
logging.error(traceback.format_exc())
raise e
The problème is that psycopg2 fail to transform nan into posgres NULL, although I have used this trick:
How do I convert numpy NaN objects to SQL nulls?
(the nan_to_null function).
I cannot make it work, it throws the following exception:
psycopg2.errors.InvalidTextRepresentation: invalid input syntax for type double precision: ""
CONTEXT: COPY test, line 2, column value1: ""
I am using python 3.8 on windows 10 with anaconda 3, psycopg2 v2.8.5 and postgres v12.3.
Thanks!
I put here the same code with the solution updated of Adrian Klaver.
The line that changed is:
df.to_csv(buffer, index_label='TIMESTAMP', header=False, na_rep='NaN')
We've added na_rep='NaN' in to_csv function. No need to replace nans with another line of code. replacing with 'NULL' does not work.
import psycopg2, logging, numpy as np, pandas as pd
from psycopg2 import sql
import traceback
from io import StringIO
if __name__ == '__main__':
table_name = "test"
df = pd.DataFrame([[1.0, 2.0], [3.0, np.nan]], columns=["VALUE0", "VALUE1"], index=pd.date_range("2000-01-01", "2000-01-02"))
database = "xxxxxx"
user = "xxxxx"
password = "xxxxxx"
host = "127.0.0.1"
port = "xxxxxx"
with psycopg2.connect(database=database,
user=user,
password=password,
host=host,
port=port) as conn:
try:
with conn.cursor() as cur:
#Creating a new table test
cmd = "CREATE TABLE {} (TIMESTAMP timestamp PRIMARY KEY NOT NULL, VALUE0 FLOAT, VALUE1 FLOAT);"
cur.execute(sql.SQL(cmd).format(sql.Identifier(table_name)))
#Writting content
buffer = StringIO()
df.to_csv(buffer, index_label='TIMESTAMP', header=False, na_rep='NaN')
buffer.seek(0)
cur.copy_from(buffer, table_name, sep=",")
#Reading the table content
cmd = "SELECT * FROM {};"
cur.execute(sql.SQL(cmd).format(sql.Identifier(table_name)))
test_data = pd.DataFrame(cur.fetchall())
print(test_data)
print(type(test_data.loc[1, 2]))
#Deleting the test table
cmd = "DROP TABLE {};"
cur.execute(sql.SQL(cmd).format(sql.Identifier(table_name)))
conn.commit()
except Exception as e:
conn.rollback()
logging.error(traceback.format_exc())
raise e
The prints shows that nan is well interpreted and stored in the DB.
The issue is the use of copy_from. From the docs:
Currently no adaptation is provided between Python and PostgreSQL types on COPY: ...
So your adapter does not come into play.
UPDATE A possible solution:
Pandas Changing the format of NaN values when saving to CSV
See #cs95 answer.
It seems you are inserting empty string instead of NULL value, you can easily reproduce you error with the following SQL code:
CREATE TABLE test(
x FLOAT
);
INSERT INTO test(x) VALUES ('');
-- ERROR: invalid input syntax for type double precision: "" Position: 29
On the other hand, NaN can be safely inserted into PostgreSQL:
INSERT INTO test(x) VALUES ('NaN');
Notice PostgreSQL float support slightly differs from IEEE 754 standards because PostresSQL needs all value to be orderable to consistently build index. Therefore NaN is greater or equal to any other number including itself in PostgreSQL.
Thanks to Adrian Klaver and jlandercy answer, the solution is simple... replace np.nan by 'NaN' manually with the following line that replace the nan_to_null function:
'''
df.replace(np.nan, "NaN", inplace=True)
'''
And it works fine. Thank you guys!
Add na_rep='NaN' when you write your csv file.
If you are using this in conjunction with psycopg2's copy_expert method, you may need to also add the null = "NaN" param to your postgres syntax so that the null representations match up.
Here's an example:
df.to_csv(csv_filename, index=False, na_rep='NaN')
string = sql.SQL("""
copy {}
from stdin (
format csv,
null "NaN",
delimiter ',',
header
)
""").format(sql.Identifier(table_name))
I'm noob at python, and I'm trying to do some operations in some tables.
At the moment I'm doing this with hard coding the tables:
cur.execute("select id,email from table1;")
but I'm want to change the tables to and variable to construct a function, like:
cur.execute("select id,email from table;")
How I can do this?
Many thanks
I'm using python3 with psycopg2
#open connection with db
conn = psycopg2.connect(host="localhost",database="test", user="test", password="test")
cur = conn.cursor()
#Select id
cur.execute("select id,email from customers;")
#put ids on list
row = cur.fetchone()
Also I tried this:
sql = "select id,email from %s"
val = customers
cur.execute(sql, val)
And I have this error:
File "updateemails.py", line 14, in <module>
val = (customers)
NameError: name 'customers' is not defined```
I am saving sequences with different ids associated with them as two column in sqlite3 DB where sequence is a column and ids_string are another column. I have problem with retrieving from the database
The dictionary is created as uniqe_sequence = [list of ids]
the sequence is a string of roughly 7000 characters or less and the list of ids could be up to 1 million characters
import sys
from Bio import SeqIO
from Bio.Seq import Seq
import time
import sqlite3
conn = sqlite3.connect('Sausql_012419.db')
c = conn.cursor()
c.execute("create table Sau (sequence text, list_of_ids text)")
for record in sequences_dict_d:
c.execute("insert into Sau values (?,?)", (record,'._/'.join(sequences_dict_d[record])))
conn.commit()
c.execute("SELECT COUNT(*) FROM Saureus_panproteome")
sql_count = c.fetchall()
print("saved sql database of {} proteins in --- {:.3f} seconds ---".format(sql_count[0][0],time.time() - start_time))
c.close()
conn.close()
#retrieval exact sequence match
for record in SeqIO.parse(queryfile, _format):
conn = sqlite3.connect('Sausql_012419.db')
c = conn.cursor()
c.execute('select list_of_ids from Sau where sequence = str(record.seq)')
print(c.fetchall()) # or do something with the list_of_ids
New to using sqlite with python. I am trying to insert two date values into two date columns in sqlite db via Python.
import sqlite3
def create_connection(db_file):
# Create a database connection to a SQLite database
# Param: db_file as str. Return: connection objects or None
try:
conn = sqlite3.connect(db_file)
cur = conn.cursor()
return conn, cur
except Error as e:
print (e)
return None
my_conn, my_cur = create_connection(dpd_sqlite_db_dir)
def create_sqlite_table_if_nonexist(conn, table_name):
sql = 'create table if not exists '+table_name+' (data_download_date datetime, script_executed_date datetime)'
conn.execute(sql)
create_sqlite_table_if_nonexist(my_conn, 'df_timestamp_en')
def insert_timestamp(conn, timestamp):
# execute insert into db
sql = ''' INSERT INTO df_timestamp_en (data_download_date, script_executed_date) VALUES(?,?) '''
cur = conn.cursor()
cur.execute(sql, timestamp)
return cur
timestamp_info = ('2018-10-30', '2018-11-30')
insert_timestamp(my_conn, timestamp_info)
It runs and created the table with two date columns, but it doesn't insert the timestamp_info's values.