Autocommit is causing pandas to_sql to fail - python-3.x

I've got a problem with my engine parameters:
import pyodbc
import pandas as pd
from sqlalchemy import create_engine
import urllib
conn_str = (
r'Driver=ODBC Driver 11 for SQL Server;'
r'Server=Saturn;'
r'Database=ExperienceRating2019;'
r'Trusted_Connection=yes;'
)
quoted_conn_str = urllib.parse.quote_plus(conn_str)
engine = create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted_conn_str)).execution_options(autocommit=True)
cnxn = engine.connect()
splitpoint = 17000
excel_file = "#2 DRATIO RUN.xlsx"
d_ratio_sheet = "D RATIO & ELR"
d_ratio = pd.read_excel(open(excel_file,'rb'),sheet_name = d_ratio_sheet)
d_ratio.to_sql("d_ratio", cnxn, if_exists = 'replace')
I will get the following error:
DBAPIError: (pyodbc.Error) ('HY010', '[HY010] [Microsoft][ODBC Driver 11 for SQL Server]Function sequence error (0) (SQLFetch)') (Background on this error at: http://sqlalche.me/e/dbapi)
If I change my engine to lose the autocommit
engine = create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted_conn_str))
The error goes away(yay!) but later in my code, where I execute a SP, it will no longer commit:
engine.execute("sp_refresh_inputs")
**Question:**How can I change my change my connection so that both pandas and sqlalchemy work?

I ended up using two engines, one for pandas and one for sqlalchemy:
cnxn = create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted_conn_str))
engine = create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted_conn_str)).execution_options(autocommit=True)

Related

How to load data from a connection string with vaex package?

If I have a table on my server and I am producing a connection string to it, how can I, using Vaex, load it to a dataframe?
Here is what I am doing but with Pandas:
from sqlalchemy import types, create_engine, text
import pandas as pd
import pymysql
def connect_to_data(driver='mysql+pymysql://', conn_string=''):
try:
conn = create_engine(driver + conn_string)
print("MySQL Connection Successfull!")
except Exception as err:
print("MySQL Connection Failed!")
print(err)
return conn
# Connect to the db:
conn_string = 'xxxxxxxx'
conn = connect_to_data(conn_string=conn_string)
# Get all requests from the db:
query = '''SELECT * FROM table_name'''
result = conn.execute(text(query))
# Desired dataframe:
df = pd.read_sql_query(query, conn)
How can I do the same with Vaex (because of it's high performance)?
For now at least, you can't do it directly. But vaex can easily read a pandas dataframe so you can
# Following your example..
pandas_df = pd.read_sql_query(query, conn)
df = vaex.from_pandas(pandas_df)

How to Encode ( utf-8) in Pandas (Excel as source)

I am trying to read from excel and load into Mongodb using Pymongo.
The Error I got cannot is "encode object: , of type: <class 'pandas._libs.missing.NAType'>", when researched, I was told to use utf-8-sign format to insert it into monogodb, but in pandas dataframe there is no option to use utf-8
from pymongo import MongoClient
from datetime import datetime
import pandas as pd
import Parameters
import pandasql as pf
import json
import pymongo
import xlrd
from pathlib import Path
import os
import constants
try:
class conn:
def __init__(self):
client = pymongo.MongoClient( "mongodb://" + constants.USER_NAME + ":" + constants.PWD + constants.server + constants.CA_CERTIFICATES_PATH)
db = client[Parameters.STG_QC_Hub_Files]
week="08-02-2021"
out_col = db[Parameters.col]
filename = "1.xlsx"
path1 = Path('//test3'+'/'+filename)
data_load_date = datetime.today().strftime('%m-%d-%Y')
df1=pd.read_excel(path1,sheet_name="AU-ARCM Details",keep_default_na=False)
# df1 = pd.read_excel(xls+filename,keep_default_na=False,encoding='utf-8-sig')
# df1 = pd.read_csv(xls,keep_default_na=False,encoding='utf-8-sig').iloc[:, : 86]
df1["Week"]=week
df1["Data Load Date"]=data_load_date
df1 = df1.astype('string')
# df1.index = df1.index.str.encode('utf-8')
df1=df1.drop(['Source.Name'], axis=1)
records = json.loads(df1.T.to_json()).values()
out_col.insert_many(df1.to_dict('records'))
print("Imported File " +str(filename)+" with " +str(len(records) )+ " records")
c = conn()
except Exception as e:
print(e)
Traceback:
File "C:\Users\PycharmProjects\ReMs\venv\lib\site-packages\pymongo\message.py", line 1323, in _do_batched_op_msg
operation, command, docs, check_keys, ack, opts, ctx)
bson.errors.InvalidDocument: cannot encode object: <NA>, of type: <class 'pandas._libs.missing.NAType'>
You have some blank cells in your spreadsheet that pandas has its own type (NAT) for; pymongo doesn't know what to do with this type, hence the error. You will need to remove any of these in order to load the values into mongodb using the method you are using.
Consider something like this just before you attempt the insert:
import numpy as np
df1 = df1.replace(np.nan, None)

Python pandas into azure SQL, bulk insert

How can I arrange bulk insert of python dataframe into corresponding azure SQL.
I see that INSERT works with individual records :
INSERT INTO XX ([Field1]) VALUES (value1);
How can I insert the entire content of dataframe into Azure table?
Thanks
According to my test, we also can use to_sql to insert data to Azure sql
for example
from urllib.parse import quote_plus
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, event
import pyodbc
# azure sql connect tion string
conn ='Driver={ODBC Driver 17 for SQL Server};Server=tcp:<server name>.database.windows.net,1433;Database=<db name>;Uid=<user name>;Pwd=<password>;Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;'
quoted = quote_plus(conn)
engine=create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted))
#event.listens_for(engine, 'before_cursor_execute')
def receive_before_cursor_execute(conn, cursor, statement, params, context, executemany):
print("FUNC call")
if executemany:
cursor.fast_executemany = True
#insert
table_name = 'Sales'
# For test, I use a csv file to create dataframe
df = pd.read_csv('D:\data.csv')
df.to_sql(table_name, engine, index=False, if_exists='replace', schema='dbo')
#test after inserting
query = 'SELECT * FROM {table}'.format(table=table_name )
dfsql = pd.read_sql(query, engine)
print(dfsql)

Getting Error while trying to retrieve text for error ORA-01804 while executing aws python lambda linux

I am trying to execute below lambda function from aws lambda, I used python 3.7 as runtime environment.
import cx_Oracle
import os
import logging
import boto3
from botocore.exceptions import ClientError
from base64 import b64decode
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def lambda_handler(event, context):
logger.info('begin lambda_handler')
os.environ['LD_LIBRARY_PATH'] = os.getcwd()
dsn = cx_Oracle.makedsn("hostname", 1521, service_name="servicename")
con = cx_Oracle.connect("userid", "passwod", dsn)
cur = con.cursor()
#logger.info('username: ' + username)
#logger.info('host: ' + host)
sql = """SELECT COUNT(*) AS TEST_COUNT FROM DUAL"""
cur.execute(sql)
columns = [i[0] for i in cur.description]
rows = [dict(zip(columns, row)) for row in cur]
logger.info(rows)
con.close()
logger.info('end lambda_handler')
return "Successfully connected to oracle."
But when i execute above lambda i get below error.
Error while trying to retrieve text for error ORA-01804
Any help on this?
Check if your Oracle instant version is the same as your database. That can also lead to this error.
I tried using the latest oracle instant client v21.1 and it spews the same error like this.
It turns out the server that hosts the database is using v11.2 so I had to download the v11.2 to match it.

AWS RDS oracle python connection

I have launched an RDS Oracle database instance and wanted to connect it using a python code. i did something using cx_oracle but not worked out.
Any suggestions/ help would be great !
Thanks in Advance
import cx_Oracle
connstr = 'username/password#testinstance.cycxmhpviuwu.eu-west-1.rds.amazonaws.com:1521/orcl'
conn = cx_Oracle.connect(connstr)
Error message I am getting is:
cx_Oracle.DatabaseError: DPI-1047: 32-bit Oracle Client library cannot be loaded: "The specified module could not be found"
You need to either (a) install the 32-bit Oracle Client libraries or (b) ensure that you are using 64-bit Python and 64-bit cx_Oracle. See the installation instructions for more information.
Please check , 32 bit/ 64 bit library , and Installation Guide
Moreover, you can refer to the below code snippet
from __future__ import print_function
import cx_Oracle
import boto3
import base64
import requests
import json
import configparser
def connect_oracle(oracle_arn,oracle_host,oracle_port,oracle_db):
session = boto3.session.Session()
client = session.client('secretsmanager','us-west-2')
response = client.get_secret_value(SecretId=oracle_arn)
data = json.loads(response['SecretString'])
dsn_tns = cx_Oracle.makedsn(oracle_host,oracle_port,oracle_db)
conn = cx_Oracle.connect(data['username'],data['password'],dsn_tns)
return conn
def test_oracle_connect():
#change the variable value as required
oracle_arn = 'oracle_arn'
oracle_host = 'oracle_host'
oracle_port = 'oracle_port'
oracle_db = 'oracle_db'
run_rds_test_scripts = 'true'
if run_rds_test_scripts == 'true':
conn = connect_oracle(oracle_arn,oracle_host,oracle_port,oracle_db)
cur = conn.cursor()
executed = cur.execute('select count(*) from dba_tables')
res = cur.fetchmany(numRows=1)
row_number = len(res)
assert row_number == 1
cur.close()
conn.close()
else:
print('no run')
test_oracle_connect()

Resources