I'm trying to convert my python script to EXE.
What the script does is a basic analysis of an excel file, and generates a report in pdf.
Also in the script, I create a png file, then reload it to the pdf through the script.
I'm trying to convert the py file to EXE but it doesn't work :(
the script (works great as py file):
import pandas as pd
import os
from pandasql import sqldf
from datetime import datetime
import numpy as nu
from tkinter import *
import tkinter as tk
from fpdf import FPDF
import matplotlib.pyplot as plt
def start_gui(root):
myLabel = Label(root, text='Hi! Here you can output the sessions report').grid(row=0, column=0)
start_button = Button(root, text='Produce Report', padx=30, pady=20, command=main, fg='blue').grid(row=50, column=0)
root.mainloop()
pass
def print_full_results(df):
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
print(df)
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
pass
def load_data(path):
df = pd.read_csv(path)
df = pd.DataFrame(df)
return df
def clean_raw_data(raw_data):
raw_data = raw_data.dropna(how='all') # Drop the rows where all elements are missing.
raw_data = raw_data.dropna(axis=1, how='all') # Drop the columns where all elements are missing.
raw_data = raw_data.reset_index() # Reset the indexes after droping rows
raw_data = raw_data.drop(columns=['index'])
raw_data = raw_data.rename(
columns={'Meeting ID': 'Meeting_ID', 'User Name': 'Admin_User_Name', 'Uzer Eam1l': 'Admin_Email',
'Has Zoom Rooms?': 'Has_Zoom_Rooms', 'Creation Time': 'Meeting_Creation_Time',
'Start Time': 'Meeting_Start_Time', 'End Time': 'Meeting_End_Time',
'Duration (Minutes)': 'Meeting_Duration_min', 'Ncmf (prjgjncl Ncmf)': 'User_Name',
'Usfr fncil': 'User_Email', 'Join Time': 'User_Join_Time', 'Leave Time': 'User_Leave_Time',
'Duration (Minutes).1': 'User_Duration_min'})
raw_data = convert_relevant_types(raw_data)
raw_data = fill_null_emails(raw_data)
return raw_data
def convert_relevant_types(db):
pd.options.mode.chained_assignment = None # default='warn'
# relevant columns (Meeting_Creation_Time,Meeting_Start_Time,Meeting_End_Time,User_Join_Time,User_Leave_Time): convert string to date
for i in range(len(db['Meeting_Start_Time'])):
creation_date = datetime.strptime(db['Meeting_Creation_Time'][i], '%m/%d/%y %H:%M')
start_date = datetime.strptime(db['Meeting_Start_Time'][i], '%m/%d/%y %H:%M')
end_date = datetime.strptime(db['Meeting_End_Time'][i], '%m/%d/%y %H:%M')
user_join_date = datetime.strptime(db['User_Join_Time'][i], '%m/%d/%y %H:%M')
user_leave_date = datetime.strptime(db['User_Leave_Time'][i], '%m/%d/%y %H:%M')
db['Meeting_Creation_Time'][i] = creation_date
db['Meeting_Start_Time'][i] = start_date
db['Meeting_End_Time'][i] = end_date
db['User_Join_Time'][i] = user_join_date
db['User_Leave_Time'][i] = user_leave_date
# relevant columns (Meeting_Duration_min,User_Duration_min): convert string to int
for i in range(len(db['Meeting_Duration_min'])):
db['Meeting_Duration_min'][i] = int(db['Meeting_Duration_min'][i])
db['User_Duration_min'][i] = int(db['User_Duration_min'][i])
return db
def fill_null_emails(db):
for i in range(len(db['User_Email'])):
if pd.isnull(db['User_Email'][i]):
db['User_Email'][i] = db['User_Name'][i] + ' Missing Mail'
return db
def pdff_space_down(pdf):
pdf.cell(0, 10, '', ln=1, align='L')
return pdf
def pdff_write(pdf, text, space=5, align='L'):
pdf.cell(0, space, text, ln=1, align='L')
return pdf
def pdff_write_table(pdf, data, spacing=1.5):
col_width = pdf.w / 4.5
row_height = pdf.font_size
for row in data:
for item in row:
pdf.cell(col_width, row_height * spacing,
txt=item, border=1)
pdf.ln(row_height * spacing)
return pdf
def create_pdf(today,min_date, max_date, sessions_num, total_cost, costs_table, num_of_users, avg_users_come):
pdf = FPDF(orientation='p', unit='mm', format='A4')
pdf.add_page()
pdf.set_font('Arial', size=10)
pdf.cell(0, 10, 'Date:{}'.format(today), ln=1, align='L')
pdf.set_font('times', 'B', size=24)
pdf.cell(0, 8, 'Home Assignment - Ziv Mor', ln=1, align='C')
pdf.set_font('times', size=18)
pdf.cell(0, 10, 'Zoom-Sessions Report (Automated by Python)', ln=1, align='C')
pdf.cell(0, 10, '({}'.format(min_date) + ' To {})'.format(max_date), ln=1, align='C')
pdf.set_font('times', 'U', size=15)
pdf = pdff_write(pdf, 'Sessions Analysis', space=20)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Total Number of Sessions: {} (Team meetings are not include)'.format(sessions_num), space=15)
pdf.set_font('times', 'UB', size=13)
pdf.cell(0, 10, 'Number Of Sessions By Dates', ln=1.5, align='C')
pdf.image('sessions_by_day_plot.png', x=55, y=None, w=100, h=70, type='', link='')
pdf = pdff_space_down(pdf)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Sessions Participants Segmentation:', space=10)
pdf = pdff_write_table(pdf, costs_table)
pdf.set_font('times', 'UB', size=13)
pdf.cell(0, 20, 'Sessions Total Cost: {} NIS'.format(total_cost), ln=1, align='C')
pdf.set_font('times', 'U', size=15)
pdf = pdff_write(pdf, 'Users Analysis', space=17)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Total Number of Users Engaged: {}'.format(num_of_users), space=10)
pdf = pdff_write(pdf, 'The Average Frequency of Arrival of Each User : {} Sessions'.format(avg_users_come),
space=10)
pdf.output('Zoom Report_{}.pdf'.format(str(datetime.today()).replace(':', '.', 3)))
def main():
path = os.path.join(sys.path[0], 'participant sessions data.csv')
raw_data = load_data(path)
zoom_db = clean_raw_data(raw_data)
'''------------------------------SQL Queries---------------------------------'''
# todo asume פגישת צוות - not counted
question_1_query = 'Select date(Meeting_Start_Time)date, count(distinct Meeting_Start_Time)Num_Of_Sessions From zoom_db where Topic <>"פגישת צוות" Group by date(Meeting_Start_Time)'
answer_1_table = sqldf(question_1_query)
num_of_sessions = nu.sum(list(answer_1_table['Num_Of_Sessions']))
# count for each meeting the number of participants
question_2_query = 'Select Topic, Meeting_Start_Time, count(Distinct User_Email)num_of_Users From zoom_db Group by Meeting_Start_Time, Meeting_ID'
answer_2_table = sqldf(question_2_query)
# count for each user number of times the user arrived to session
# todo - mention I didnt concluded rows that user got in for less than 1 minute + there are a lot of users without mail so I assume for
question_3_query = 'select User_Email, count(*)num_of_arrivals from(Select User_Email, Meeting_Start_Time, Meeting_ID From zoom_db Where User_Duration_min <> 0 Group by User_Email, Meeting_ID , Meeting_Start_Time) group by User_Email Order by num_of_arrivals desc'
answer_3_table = sqldf(question_3_query)
# Calculate the avg times of arrival of users (Using the result of 3'rd question query #todo - asumming not conclud the host
participants_arrivals_list = list(answer_3_table['num_of_arrivals'])[1:]
avg_users_come = round((nu.average(participants_arrivals_list)), 2)
'''---------------------More Calculates for the report------------------------'''
# Calculate the intervals of dates
min_date_qu = sqldf('select min(date(Meeting_Start_Time)) from zoom_db')
min_date_qu = list(min_date_qu['min(date(Meeting_Start_Time))'])[0]
max_date_qu = sqldf('select max(date(Meeting_Start_Time)) from zoom_db')
max_date_qu = list(max_date_qu['max(date(Meeting_Start_Time))'])[0]
num_meetings0_5 = sqldf('select count(*) from answer_2_table where num_of_users<=5 and Topic <>"פגישת צוות"')
num_meetings0_5 = list(num_meetings0_5['count(*)'])[0]
num_meetings5_10 = sqldf(
'select count(*) from answer_2_table where num_of_users>5 and num_of_users<=10 and Topic <>"פגישת צוות"')
num_meetings5_10 = list(num_meetings5_10['count(*)'])[0]
num_meetings10_15 = sqldf(
'select count(*) from answer_2_table where num_of_users>10 and num_of_users<=15 and Topic <>"פגישת צוות"')
num_meetings10_15 = list(num_meetings10_15['count(*)'])[0]
num_meetings_15_plus = sqldf('select count(*) from answer_2_table where num_of_users>15 and Topic <>"פגישת צוות"')
num_meetings_15_plus = list(num_meetings_15_plus['count(*)'])[0]
total_cost = 50 * num_meetings0_5 + 100 * num_meetings5_10 + 150 * num_meetings10_15 + 200 * num_meetings_15_plus
costs_table = [['Session type', 'Number of sessions', 'Cost'],
['0-5 participants', str(num_meetings0_5), str(50 * num_meetings0_5)],
['5-10 participants', str(num_meetings5_10), str(100 * num_meetings5_10)],
['10-15 participants', str(num_meetings10_15), str(150 * num_meetings10_15)],
['15+ participants', str(num_meetings_15_plus), str(200 * num_meetings_15_plus)]]
sessions_by_day_plot = answer_1_table.plot.bar(x='date', y='Num_Of_Sessions', rot=80)
plt.savefig('sessions_by_day_plot.png')
num_of_users = sqldf('select count(*) From answer_3_table')
num_of_users = list(num_of_users['count(*)'])[0]
today = datetime.today().strftime("%b-%d-%Y")
'''----------------------------------Out-Put Results------------------------'''
create_pdf(today = today , max_date=max_date_qu, min_date=min_date_qu, sessions_num=num_of_sessions,
total_cost=total_cost, costs_table=costs_table, num_of_users=num_of_users, avg_users_come=avg_users_come)
writer = pd.ExcelWriter('Zoom Report_{}.xlsx'.format(str(datetime.today()).replace(':', '.', 3)))
(answer_2_table).to_excel(writer , sheet_name='Sessions Number of Participants')
(answer_3_table).to_excel(writer, sheet_name='Participants show-up')
writer.save()
'''---------------------Delete not relevant files------------------------'''
plot1_path = os.path.join(sys.path[0], 'sessions_by_day_plot.png')
os.remove(plot1_path)
exit()
if __name__ == '__main__':
root = Tk()
start_gui(root)
# main()
Related
i have a directory containing three files, years.csv, 2014.csv and 2015.csv. i want to plot a population pyramid graph for the two files but i want pandas to pick the dataframe from the years.csv with respect to the slider value.
my years.csv looks like, on the slider when i select 2014, from the code you can see, its an int that i convert into a string and append .csv to it. but all i want is that final string interpreted as df = pd.read_csv('2014.csv') so that i can be able to generate graphs of all the years as long as that file is in the directoy.
years
0
2014(2014.csv)
1
2015(2015.csv)
from dash import Dash, dcc, html, Input, Output
# import plotly.express as px
import plotly.graph_objects as gp
import pandas as pd
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv')
df = pd.read_csv('years.csv')
app = Dash(__name__)
app.layout = html.Div([
dcc.Graph(id='graph-with-slider'),
dcc.Slider(
df['year'].min(),
df['year'].max(),
step=None,
value=df['year'].min(),
marks={str(year): str(year) for year in df['year'].unique()},
id='year-slider'
)
])
#app.callback(
Output('graph-with-slider', 'figure'),
Input('year-slider', 'value'))
def update_figure(selected_year):
new_df = str(df[df.year == selected_year]) + ".csv"
print(new_df)
# fig = px.scatter(filtered_df, x="gdpPercap", y="lifeExp",
# size="pop", color="continent", hover_name="country",
# log_x=True, size_max=55)
y_age = new_df['Age']
x_M = new_df['Male']
x_F = new_df['Female'] * -1
# fig.update_layout(transition_duration=500)
# Creating instance of the figure
fig = gp.Figure()
# Adding Male data to the figure
fig.add_trace(gp.Bar(y= y_age, x = x_M,
name = 'Male',
orientation = 'h'))
# Adding Female data to the figure
fig.add_trace(gp.Bar(y = y_age, x = x_F,
name = 'Female', orientation = 'h'))
# Updating the layoutout for our graph
fig.update_layout(title = 'Population Pyramid of Uganda-2015',
title_font_size = 22, barmode = 'relative',
bargap = 0.0, bargroupgap = 0,
xaxis = dict(tickvals = [-600000, -400000, -200000,
0, 200000, 400000, 600000],
ticktext = ['6k', '4k', '2k', '0',
'2k', '4k', '6k'],
title = 'Population in Thousands',
title_font_size = 14)
)
# fig.show()
return fig
if __name__ == '__main__':
app.run_server(debug=True)
I am having an issue writing a dataframe to my django models.py.
The file is long, but is quite simple in its methodology:
-import modules
-create django database
-requests.get necessary data
-alter data some to fit my goals, save as df
-connect to django db and insert df
My models.py is the following:
from django.db import models
import requests
import pandas as pd
from datetime import timezone
from datetime import datetime
from datetime import date
from datetime import timedelta
import time
from django.conf import settings
from sqlalchemy.engine import create_engine
class cryptoData(models.Model):
coin = models.CharField(max_length=10)
asset_id = models.SmallIntegerField()
time = models.DateTimeField()
close = models.FloatField()
volume = models.BigIntegerField()
market_cap = models.FloatField()
reddit_posts = models.IntegerField()
reddit_comments = models.IntegerField()
tweets = models.IntegerField()
tweet_favorites = models.IntegerField()
social_volume = models.IntegerField()
lunarcrush_key = 'fakekey1234'
def top_coins():
lc_market = requests.get(
url = 'https://api.lunarcrush.com/v2?data=market&',
params = {
'key': lunarcrush_key,
}
)
all_coins = []
for entry in lc_market.json().get('data'):
coin = []
coin.append(entry.get('s'))
coin.append(entry.get('mc'))
all_coins.append(coin)
all_coins.sort(key = lambda x : x[1], reverse = True)
top_ten_coins = all_coins[:10]
return(top_ten_coins)
top_coins_lst = top_coins()
top_coin_names_lst = [x[0] for x in top_coins_lst]
def get_coin_data(key, coin, date_diff, start_date, end_date):
lc = requests.get(
url = 'https://api.lunarcrush.com/v2?data=assets&',
params = {
'key': lunarcrush_key,
'symbol': coin,
'interval': 'day',
'data_points': date_diff,
'start': int(start_date.replace(tzinfo=timezone.utc).timestamp()),
'end': int(end_date.replace(tzinfo=timezone.utc).timestamp())
}
)
metric_names = []
for entry in lc.json().get('data')[0].get('timeSeries'):
for key in entry:
metric_names.append(key) if key not in metric_names else metric_names
metrics_list = []
for entry in lc.json().get('data')[0].get('timeSeries'):
row_list = []
for key in entry:
row_list.append(entry.get(key))
metrics_list.append(row_list)
metrics_df = pd.DataFrame(metrics_list, columns = metric_names)
metrics_df['time'] = metrics_df['time'].apply(lambda x : datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
metrics_df['coin'] = coin
cols = list(metrics_df)
cols.insert(0, cols.pop(cols.index('coin')))
metrics_df = metrics_df.loc[:, cols]
return(metrics_df)
def get_all_coins_data(coins_list):
appended_data = []
end_date = datetime.now()
start_date = end_date - timedelta(days = 700)
date_diff = (end_date - start_date).days
for coin in coins_list:
appended_data.append(get_coin_data(lunarcrush_key, coin, date_diff, start_date, end_date))
time.sleep(.1)
output = pd.concat(appended_data)
return(output)
df = get_all_coins_data(top_coin_names_lst)
focused_df = df[['coin', 'asset_id', 'time', 'close', 'volume', 'market_cap', 'reddit_posts', 'reddit_comments', 'tweets', 'tweet_favorites', 'social_volume']]
user = settings.DATABASES['default']['USER']
password = settings.DATABASES['default']['PASSWORD']
database_name = settings.DATABASES['default']['NAME']
database_url = 'sqlite://{user}:{password}#localhost:5432/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
)
engine = create_engine(database_url, echo=False)
focused_df.to_sql(cryptoData, con=engine)
When I run the manage.py runserver command, I get the following error:
sqlalchemy.exc.ArgumentError: Invalid SQLite URL: sqlite://user:password#localhost:5432/C:\Users\user\Programming\django_crypto_v6\source\db.sqlite3
Valid SQLite URL forms are:
sqlite:///:memory: (or, sqlite://)
sqlite:///relative/path/to/file.db
sqlite:////absolute/path/to/file.db
I'm struggling to resolve this issue. Any thoughts?
you are using the wrong pattern for SQLite database_url
see the docs at https://docs.sqlalchemy.org/en/14/core/engines.html#sqlite
I have data from the same month over period of time and I trying to plot the mean by day of the motnh but I don´t know how to do it.
This is how the dataframe looks like
The main code to get the dataframe:
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs
import matplotlib.pyplot as plt
from datetime import date, timedelta
from datetime import datetime
inicio = date(1973, 1, 1)
#inicio = date(2019, 2, 15)
#final = date(2000, 10, 10)
final = date(1974, 3, 1)
delta = timedelta(days=1)
años=[]
links=[]
while inicio <= final:
fechas=inicio.strftime("%Y-%m-%d")
#años.append(datetime.strptime(fechas, '%Y-%m-%d').date())
años.append(fechas)
url='http://weather.uwyo.edu/cgi-bin/sounding?region=samer&TYPE=TEXT%3ALIST&YEAR={}&MONTH={}&FROM={}12&TO={}12&STNM=80222'.format(fechas[0:4],fechas[5:7],fechas[8:10],fechas[8:10])
links.append(url)
inicio += delta
d = dict(zip(años, links))
df1=pd.DataFrame(list(d.items()), columns=['Fecha', 'url'])
df1.set_index('Fecha', inplace=True)
Enero=pd.DataFrame()
Febrero=pd.DataFrame()
for i in df1.index:
if i[5:7]=='01':
Enero = Enero.append(df1.loc[i], ignore_index=False)
elif i[5:7]=='02':
Febrero = Febrero.append(df1.loc[i], ignore_index=False)
labels = ['PRES', 'HGHT', 'TEMP', 'DWPT', 'RELH', 'MIXR', 'DRCT', 'SKNT', 'THTA', 'THTE', 'THTV']
def reques(url):
try:
results = []
peticion=requests.get(url)
soup=bs(peticion.content, 'lxml')
pre = (soup.select_one('pre')).text
for line in pre.split('\n')[4:-1]:
#print (line)
if '--' not in line:
row = [line[i:i+7].strip() for i in range(0, len(line), 7)]
results.append(row)
else:
pass
df5=pd.DataFrame.from_records(results, columns=labels)
#return x
return df5
except AttributeError:
pass
SuperDF = pd.DataFrame()
SuperDF = pd.DataFrame(columns=labels)
startTime = datetime.now()
sin_datos=[]
for i in Febrero['url']:
try:
x=reques(i)
df2=x
y=str(df1[df1['url']==i].index.values)
df2.index = [y] * len(x)
SuperDF=SuperDF.append(x)
except TypeError:
sin_datos.append(df1[df1['url']==i].index.values)
print (df1[df1['url']==i].index.values)
SuperDF.index= SuperDF.index.map(lambda x: x.lstrip("['").rstrip("]''"))
SuperDF.index = pd.to_datetime(SuperDF.index)
SuperDF=SuperDF.apply(pd.to_numeric)
SuperDF
I've been trying to do it whit this
import seaborn as sns
SuperDF = SuperDF[(SuperDF['TEMP']==0)]
ax = SuperDF.loc['02', 'RELH'].plot(marker='o', linestyle='-')
ax.set_ylabel('RELH');
but I got this error
KeyError: '02'
It works when i pass the year but i need the mean by day for the month. Any help will be appreciate.
This is what I need
there is a file I tried to import and safe as pandas df. At a first sight looks like it's already columns and rows ordered, but finally I had to do a bunch of stuff to create pandas df. Could you please check if there is much faster way to manage it?
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
My way of doing it is:
import requests
import pandas as pd
r = requests.get(url)
file = r.text
step_1 = file.split('\n')
for n in range(len(step_1)): # remove empty strings
if bool(step_1[n]) == False:
del(step_1[n])
step_2 = [i.split('\t') for i in step_1]
cars_names = [i[1] for i in step_2]
step_3 = [i[0].split(' ') for i in step_2]
for e in range(len(step_3)): # remove empty strings in each sublist
step_3[e] = [item for item in step_3[e] if item != '']
mpg = [i[0] for i in step_3]
cylinders = [i[1] for i in step_3]
disp = [i[2] for i in step_3]
horsepower = [i[3] for i in step_3]
weight = [i[4] for i in step_3]
acce = [i[5] for i in step_3]
year = [i[6] for i in step_3]
origin = [i[7] for i in step_3]
list_cols = [cars_names, mpg, cylinders, disp, horsepower, weight, acce, year, origin]
# list_labels written manually:
list_labels = ['car name', 'mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin']
zipped = list(zip(list_labels, list_cols))
data = dict(zipped)
df = pd.DataFrame(data)
When you replaced \t to blankspace, you can use read_csv to read it. But you need to wrap up your text, because the first parameter in read_csv is filepath_or_buffer which needs object with a read() method (such as a file handle or StringIO). Then your question can be transform to read_csv doesn't read the column names correctly on this file?
import requests
import pandas as pd
from io import StringIO
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
r = requests.get(url)
file = r.text.replace("\t"," ")
# list_labels written manually:
list_labels = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin','car name']
df = pd.read_csv(StringIO(file),sep="\s+",header = None,names=list_labels)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(df)
I am trying to write a function to write and read transaction details to/from a .h5 file. I want to effectively use one file to store some transaction details, and when necessary, derive the details. Here's my code:
import h5py
import numpy as np
import pandas as pd
from datetime import datetime
from os import listdir
from pandas import HDFStore
def maintainLedger(mode, tick, lastBuyy = 0, lastSell = 0, quan = 0, prof = 0):
"""THIS FUNCTION WRITES AND READS TRANSACTION DETAILS.
mode = 0 - IF FILE EXITS, READ FILE
mode = 1 - IF FILE EXITS, APPEND TO FILE"""
# CHECK IF LEDGER FILE EXISTS, IF NOT CREATE A LEDGER FILE FOR THE FIRST TIME
path = r'ledger'
suff = r'h5'
flie = listdir(path)
flie = [item for item in flie if item.endswith(suff)]
if len(flie) == 0:
HDF5Data = HDFStore('ledger/ledger.h5')
# GENERATE NEW VALUES OF DATE/TIME
mi = int(datetime.now().minute)
ho = int(datetime.now().hour)
da = int(datetime.now().day)
we = int(datetime.now().isocalendar()[1])
mo = int(datetime.now().month)
ye = int(datetime.now().year)
newwData = np.array([mode, mi, ho, da, we, mo, ye, tick, lastBuyy, lastSell, quan, prof]).reshape(1, 12)
newwData = pd.DataFrame(newwData, columns = ['mode', 'mi', 'ho', 'da', 'we', 'mo', 'ye', 'tick', 'laBu', 'laSe', 'quan', 'prof'])
HDF5Data.put('data', newwData, format = 'table', data_columns = True)
HDF5Data.close()
elif len(flie) == 1:
if mode == 0:
# READ PREVIOUSLY SAVED DATA AS PANDAS DATAFRAME
readData = pd.read_hdf('ledger/ledger.h5', mode = 'r')
# DO SOMETHING...
elif mode == 1:
# GENERATE NEW VALUES OF DATE/TIME
mi = int(datetime.now().minute)
ho = int(datetime.now().hour)
da = int(datetime.now().day)
we = int(datetime.now().isocalendar()[1])
mo = int(datetime.now().month)
ye = int(datetime.now().year)
# GATHER NEW DATA INTO NUMPY ARRAY AND CONVERT TO PANDAS DATAFRAME
newwData = np.array([mode, mi, ho, da, we, mo, ye, tick, lastBuyy, lastSell, quan, prof]).reshape(1, 12)
newwData = pd.DataFrame(newwData, columns = ['mode', 'mi', 'ho', 'da', 'we', 'mo', 'ye', 'tick', 'laBu', 'laSe', 'quan', 'prof'])
# READ PREVIOUSLY SAVED DATA AS PANDAS DATAFRAME AND APPEND NEW DATA
readData = pd.read_hdf('ledger/ledger.h5', mode = 'a')
readData.append('data', newwData)
tempData = pd.read_hdf('ledger/ledger.h5', mode = 'r')
print(tempData)
else:
print('Please check input data for errors!')
if __name__ == '__main__':
maintainLedger(1, "AAPL")
When I run the code, I am getting the following error:
TypeError: cannot concatenate object of type "<class 'str'>"; only pd.Series, pd.DataFrame, and pd.Panel (deprecated) objs are valid
I have tried looking for a solution, and a quick search led me to this, which didn't solve my problem. Is there something I am doing wrong? Any advice would be appreciated.
import h5py
import numpy as np
import pandas as pd
from datetime import datetime
from os import listdir
from pandas import HDFStore
def maintainLedger(mode, tick = 'QUERY', lastBuyy = 0, lastSell = 0, quan = 0, prof = 0):
"""THIS FUNCTION WRITES AND READS TRANSACTION DETAILS.
mode = 0 - IF FILE EXITS, READ FILE
mode = 1 - IF FILE EXITS, APPEND TO FILE"""
# CHECK IF LEDGER FILE EXISTS, IF NOT CREATE A LEDGER FILE FOR THE FIRST TIME
path = r'ledger'
suff = r'h5'
flie = listdir(path)
flie = [item for item in flie if item.endswith(suff)]
if len(flie) == 0:
# GENERATE NEW VALUES OF DATE/TIME
mi = int(datetime.now().minute)
ho = int(datetime.now().hour)
da = int(datetime.now().day)
we = int(datetime.now().isocalendar()[1])
mo = int(datetime.now().month)
ye = int(datetime.now().year)
# GATHER NEW DATA INTO NUMPY ARRAY AND CONVERT TO PANDAS DATAFRAME
newwData = np.array([mode, mi, ho, da, we, mo, ye, tick, lastBuyy, lastSell, quan, prof]).reshape(1, 12)
newwData = pd.DataFrame(newwData, columns = ['mode', 'mi', 'ho', 'da', 'we', 'mo', 'ye', 'tick', 'laBu', 'laSe', 'quan', 'prof'])
# SAVE ALL DATA INTO .H5 FORMAT
HDF5Data = HDFStore('ledger/ledger.h5')
HDF5Data.put('data', newwData, format = 'table', data_columns = True)
HDF5Data.close()
elif len(flie) == 1:
if mode == 0:
"""THIS OPTION ENABLES CODE TO READ DATA."""
# READ PREVIOUSLY SAVED DATA AS PANDAS DATAFRAME
readData = pd.read_hdf('ledger/ledger.h5', mode = 'r')
# DO SOMETHING...
print(readData)
elif mode == 1:
"""THIS OPTION ENABLES CODE TO APPEND DATA."""
# GENERATE NEW VALUES OF DATE/TIME
mi = int(datetime.now().minute)
ho = int(datetime.now().hour)
da = int(datetime.now().day)
we = int(datetime.now().isocalendar()[1])
mo = int(datetime.now().month)
ye = int(datetime.now().year)
# GATHER NEW DATA INTO NUMPY ARRAY AND CONVERT TO PANDAS DATAFRAME
newwData = np.array([mode, mi, ho, da, we, mo, ye, tick, lastBuyy, lastSell, quan, prof]).reshape(1, 12)
newwData = pd.DataFrame(newwData, columns = ['mode', 'mi', 'ho', 'da', 'we', 'mo', 'ye', 'tick', 'laBu', 'laSe', 'quan', 'prof'])
# READ PREVIOUSLY SAVED DATA AS PANDAS DATAFRAME AND APPEND NEW DATA
readData = pd.read_hdf('ledger/ledger.h5', mode = 'r')
readData = readData.append(newwData)
# SAVE ALL DATA INTO .H5 FORMAT
HDF5Data = HDFStore('ledger/ledger.h5')
HDF5Data.put('data', readData, format = 'table', data_columns = True)
HDF5Data.close()
else:
print('Please check input data for errors!')
if __name__ == '__main__':
maintainLedger(1, 'MSFT')