retrieve the columns of SAP HANA tables - python-3.x

I want to get the columns of multiple tables in SAP HANA database. I am using hdbcli and it is giving error :
hdbcli.dbapi.Error: (362, 'invalid schema name: INFORMATION_SCHEMA: line 1 col 15
Python code :
import hdbcli
from hdbcli import dbapi
import pandas as pd
from google.cloud import bigquery
conn = dbapi.connect(
address="example.hana.trial-us10.hanacloud.ondemand.com",
port=443,
user='DBADMIN',
password='example#xxxxxx'
)
tables = ['table1', 'table2', 'table3']
for table in tables:
cursor = conn.cursor()
cursor.execute(f"SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME='{table}'")
print(f"Table '{table}' description:")
print([column[3] for column in cursor.fetchall()])
cursor.close()
conn.close()
need some help to proceed. Thanks

Related

Size in spark dataframe

I created a dataframe with a table of my postgres database. when i pass this command to see the number of row (df.count()), i have the error :
WARN TaskSetManager: Stage 9 contains a task of very large size (22439 KiB). The maximum recommended task size is 1000 KiB.
What does that mean ? what is the maximum size of a dataframe in spark ?
Here's the way that i connected to the postgre Database :
import configparser
import psycopg2
import pandas as pd
from queries import COUNTRY_TABLE,ACTORS_TABLE,COL_ACTOR, COL_COUNTRY
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf
spark = SparkSession.builder.appName('ETL dvdrental pysaprk').getOrCreate()
def connection_db():
conn = psycopg2.connect("host=localhost dbname=demo user=postgres password=admin port=5432")
cur = conn.cursor()
return [cur, conn]
def extract_data(query):
conn_param = connection_db()
cur = conn_param[0]
conn = conn_param[1]
try:
cur.execute(query)
data = cur.fetchall()
return data
except Exception as e:
print(e)
tickets_col = ["ticket_no","book_ref", "passenger_id", "passenger_name","contact_data"]
tickets = spark.createDataFrame(extract_data("SELECT * FROM tickets")).toDF(*tickets_col)
tickets.count()
I have the warning when i execute tickets.count()

How to load data from a connection string with vaex package?

If I have a table on my server and I am producing a connection string to it, how can I, using Vaex, load it to a dataframe?
Here is what I am doing but with Pandas:
from sqlalchemy import types, create_engine, text
import pandas as pd
import pymysql
def connect_to_data(driver='mysql+pymysql://', conn_string=''):
try:
conn = create_engine(driver + conn_string)
print("MySQL Connection Successfull!")
except Exception as err:
print("MySQL Connection Failed!")
print(err)
return conn
# Connect to the db:
conn_string = 'xxxxxxxx'
conn = connect_to_data(conn_string=conn_string)
# Get all requests from the db:
query = '''SELECT * FROM table_name'''
result = conn.execute(text(query))
# Desired dataframe:
df = pd.read_sql_query(query, conn)
How can I do the same with Vaex (because of it's high performance)?
For now at least, you can't do it directly. But vaex can easily read a pandas dataframe so you can
# Following your example..
pandas_df = pd.read_sql_query(query, conn)
df = vaex.from_pandas(pandas_df)

Python pandas into azure SQL, bulk insert

How can I arrange bulk insert of python dataframe into corresponding azure SQL.
I see that INSERT works with individual records :
INSERT INTO XX ([Field1]) VALUES (value1);
How can I insert the entire content of dataframe into Azure table?
Thanks
According to my test, we also can use to_sql to insert data to Azure sql
for example
from urllib.parse import quote_plus
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, event
import pyodbc
# azure sql connect tion string
conn ='Driver={ODBC Driver 17 for SQL Server};Server=tcp:<server name>.database.windows.net,1433;Database=<db name>;Uid=<user name>;Pwd=<password>;Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;'
quoted = quote_plus(conn)
engine=create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted))
#event.listens_for(engine, 'before_cursor_execute')
def receive_before_cursor_execute(conn, cursor, statement, params, context, executemany):
print("FUNC call")
if executemany:
cursor.fast_executemany = True
#insert
table_name = 'Sales'
# For test, I use a csv file to create dataframe
df = pd.read_csv('D:\data.csv')
df.to_sql(table_name, engine, index=False, if_exists='replace', schema='dbo')
#test after inserting
query = 'SELECT * FROM {table}'.format(table=table_name )
dfsql = pd.read_sql(query, engine)
print(dfsql)

how to create table into SQLite3 from importing excel data in python?

In my code, I am importing data from excel file into an SQLite database using python.
it doesn't give any error but it converts every excel column name into a table.
I have multiple excel files with the same data structure, containing 40K rows and 52 columns each file.
when I am importing these file data into SQLite database using python code it converts each column header name into a table.
import sqlite3
import pandas as pd
filename= gui_fname()
con=sqlite3.connect("cps.db")
wb = pd.read_excel(filename,sheet_name ='Sheet2')
for sheet in wb:
wb[sheet].to_sql(sheet,con,index=False,if_exists = 'append')
con.commit()
con.close()
it should create a table with the name of Sheet which I am importing.
I do some hit and trial and found the solution:
I just put con.commit() within the for loop and it works as required, but I didn't get the logic.
I will appreciate if anyone can explain to me this.
import sqlite3
import pandas as pd
filename= gui_fname()
con=sqlite3.connect("cps.db")
wb = pd.read_excel(filename,sheet_name = 'Sheet2')
for sheet in wb:
wb[sheet].to_sql(sheet,con,index=False,if_exists = 'append')
con.commit()
con.close()
import pandas as pd
def import_excel_to_sqlite_db(excelFile):
df = pd.read_excel(excelFile)
con = sqlite3.connect("SQLite.db")
cur = con.cursor()
results = cur.execute("Select * from TableName")
final = df.to_sql("TableName", con, if_exists="append", index=False)
pd.DataFrame(results, columns=final)
con.commit()
cur.close()

insert using pandas to_sql() missing data into clickhouse db

It's my first time using sqlalchemy and pandas to insert some data into a clickhouse db.
When I try to insert some data using clickhouse cli it works fine, but when I tried to do the same thing using sqlalchemy I don't know why one row is missing.
Have I done something wrong?
import pandas as pd
# created the dataframe
engine = create_engine(uri)
session = make_session(engine)
metadata = MetaData(bind=engine)
metadata.reflect(bind = engine)
conn = engine.connect()
df.to_sql('test', conn, if_exists = 'append', index = False)
Let's try this way:
import pandas as pd
from infi.clickhouse_orm.engines import Memory
from infi.clickhouse_orm.fields import UInt16Field, StringField
from infi.clickhouse_orm.models import Model
from sqlalchemy import create_engine
# define the ClickHouse table schema
class Test_Humans(Model):
year = UInt16Field()
first_name = StringField()
engine = Memory()
engine = create_engine('clickhouse://default:#localhost/test')
# create table
with engine.connect() as conn:
conn.connection.create_table(Test_Humans) # https://github.com/Infinidat/infi.clickhouse_orm/blob/master/src/infi/clickhouse_orm/database.py#L142
pdf = pd.DataFrame.from_records([
{'year': 1994, 'first_name': 'Vova'},
{'year': 1995, 'first_name': 'Anja'},
{'year': 1996, 'first_name': 'Vasja'},
{'year': 1997, 'first_name': 'Petja'},
# ! sqlalchemy-clickhouse ignores the last item so add fake one
{}
])
pdf.to_sql('test_humans', engine, if_exists='append', index=False)
Take into account that sqlalchemy-clickhouse ignores the last item so add fake one (see source code and related issue 10).

Resources