Uniting two dataframes (pandas) into one excel file and save it using tkinter filedialog

Uniting two dataframes (pandas) into one excel file and save it using tkinter filedialog - excel

I have a SQLite database with two tables. One for clients and another one for processes of each client. I need a function to export a pandas dataframe using filedialog. If I use XslsWriter, the Excel file is automatically saved where the program is running, but I wish the user to choose his own location.
I have tried using Xlsxwriter, which saves in the same directory where the program is running:
def exporta_tabela(self):
self.conecta_db()
self.cursor.execute("""SELECT * FROM clientes""")
tabelaCli = self.cursor.fetchall()
tabelaCli = pd.DataFrame(tabelaCli, columns=['COD', 'CPF', 'Nome', 'Sobrenome', 'Telefone', 'Estado', 'Cidade','Bairro', 'Logradouro', 'Num.', 'Comp.', 'Area', 'RegistradoEm'])
writer = pd.ExcelWriter('clientes_processos.xlsx', engine='xlsxwriter')
self.cursor.execute("""SELECT * FROM processos """)
tabelaProc = self.cursor.fetchall()
tabelaProc = pd.DataFrame(tabelaProc, columns=['ID', 'Adv_Resp', 'Tipo', 'Processo', 'Status', 'Cliente_COD'])
tabelaCli.to_excel(writer, sheet_name='Clientes')
tabelaProc.to_excel(writer, sheet_name='Processos')
writer.save()
self.conn.commit()
self.desconecta_db()
=====================================================================
And i have also tried without xlsxWriter, but it saves just one or the other sheet, and i need both:
def exporta_tabela(self):
self.conecta_db()
self.cursor.execute("""SELECT * FROM clientes""")
tabelaCli = self.cursor.fetchall()
tabelaCli = pd.DataFrame(tabelaCli, columns=['COD', 'CPF', 'Nome', 'Sobrenome', 'Telefone', 'Estado', 'Cidade','Bairro', 'Logradouro', 'Num.', 'Comp.', 'Area', 'RegistradoEm'])
self.cursor.execute("""SELECT * FROM processos """)
tabelaProc = self.cursor.fetchall()
tabelaProc = pd.DataFrame(tabelaProc, columns=['ID', 'Adv_Resp', 'Tipo', 'Processo', 'Status', 'Cliente_COD'])
with filedialog.asksaveasfile(mode='w', defaultextension='.xlsx') as file:
tabelaCli.to_excel(file.name, sheet_name='Clientes')
tabelaProc.to_excel(file.name, sheet_name='Processos')
self.conn.commit()
self.desconecta_db()

Got it to work as follows:
def exporta_tabela(self):
self.conecta_db()
self.cursor.execute("""SELECT * FROM clientes""")
tabelaCli = self.cursor.fetchall()
self.cursor.execute("""SELECT * FROM processos """)
tabelaProc = self.cursor.fetchall()
tabelaCli = pd.DataFrame(tabelaCli, columns=['COD', 'CPF', 'Nome', 'Sobrenome', 'Telefone', 'Estado', 'Cidade',
'Bairro', 'Logradouro', 'Num.', 'Comp.', 'Area', 'RegistradoEm'])
tabelaProc = pd.DataFrame(tabelaProc, columns=['ID', 'Adv_Resp', 'Tipo', 'Processo', 'Status', 'Cliente_COD'])
outpath = filedialog.asksaveasfile(mode='wb', defaultextension='.xlsx')
with pd.ExcelWriter(outpath, engine='xlsxwriter') as writer:
tabelaCli.to_excel(writer, sheet_name='Clientes')
tabelaProc.to_excel(writer, sheet_name='Processos')
self.conn.commit()
self.desconecta_db()

Related

Function to parse a dataframe into a SQL table

I am trying to create a function that will accept a dataframe and will parse that dataframe into a sql server table. I am stuck as to what needs go in the select statement below the insert query.
df- dataframe
desttable - destination table that needs to be parsed.
tablecols - An array of the table columns for the table
# Insert DataFrame to Table
def InsertintoDb(self, df, desttable, tablecols):
tablecolnames = ','.join(tablecols)
qmark = ['?' for s in tablecols]
allqmarks = ','.join(qmark)
#rowappendcolname = ','.join(['row.' + s for s in tablecols])
for index, row in df.iterrows():
cursor.execute(
'''INSERT INTO [Py_Test].[dbo].''' + desttable + ''' ( ''' + tablecolnames + ''')VALUES (''' + allqmarks + ''')''',
)
self.conn.commit()
Any help is much appreciated.

As suggested by the gentleman in the comment, I was able to do it using df.to_sql . Here is the working code -
class DbOps:
def __init__(self):
self.username = ''
self.password = ''
self.ipaddress = 'localhost'
# self.port = 5439
self.dbname = ''
# A long string that contains the necessary Postgres login information
self.engine = sqlalchemy.create_engine(
f"mssql+pyodbc://{self.username}:%s#{self.ipaddress}/{self.dbname}?driver=SQL+Server+Native+Client+11.0" % urlquote(f'
{self.password }'))
def InsertintoDb(self, df, desttable, tablecols):
df.to_sql(desttable, self.engine, index=False, if_exists='append')

Inserting pandas dataframe into django model

I am having an issue writing a dataframe to my django models.py.
The file is long, but is quite simple in its methodology:
-import modules
-create django database
-requests.get necessary data
-alter data some to fit my goals, save as df
-connect to django db and insert df
My models.py is the following:
from django.db import models
import requests
import pandas as pd
from datetime import timezone
from datetime import datetime
from datetime import date
from datetime import timedelta
import time
from django.conf import settings
from sqlalchemy.engine import create_engine
class cryptoData(models.Model):
coin = models.CharField(max_length=10)
asset_id = models.SmallIntegerField()
time = models.DateTimeField()
close = models.FloatField()
volume = models.BigIntegerField()
market_cap = models.FloatField()
reddit_posts = models.IntegerField()
reddit_comments = models.IntegerField()
tweets = models.IntegerField()
tweet_favorites = models.IntegerField()
social_volume = models.IntegerField()
lunarcrush_key = 'fakekey1234'
def top_coins():
lc_market = requests.get(
url = 'https://api.lunarcrush.com/v2?data=market&',
params = {
'key': lunarcrush_key,
}
)
all_coins = []
for entry in lc_market.json().get('data'):
coin = []
coin.append(entry.get('s'))
coin.append(entry.get('mc'))
all_coins.append(coin)
all_coins.sort(key = lambda x : x[1], reverse = True)
top_ten_coins = all_coins[:10]
return(top_ten_coins)
top_coins_lst = top_coins()
top_coin_names_lst = [x[0] for x in top_coins_lst]
def get_coin_data(key, coin, date_diff, start_date, end_date):
lc = requests.get(
url = 'https://api.lunarcrush.com/v2?data=assets&',
params = {
'key': lunarcrush_key,
'symbol': coin,
'interval': 'day',
'data_points': date_diff,
'start': int(start_date.replace(tzinfo=timezone.utc).timestamp()),
'end': int(end_date.replace(tzinfo=timezone.utc).timestamp())
}
)
metric_names = []
for entry in lc.json().get('data')[0].get('timeSeries'):
for key in entry:
metric_names.append(key) if key not in metric_names else metric_names
metrics_list = []
for entry in lc.json().get('data')[0].get('timeSeries'):
row_list = []
for key in entry:
row_list.append(entry.get(key))
metrics_list.append(row_list)
metrics_df = pd.DataFrame(metrics_list, columns = metric_names)
metrics_df['time'] = metrics_df['time'].apply(lambda x : datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
metrics_df['coin'] = coin
cols = list(metrics_df)
cols.insert(0, cols.pop(cols.index('coin')))
metrics_df = metrics_df.loc[:, cols]
return(metrics_df)
def get_all_coins_data(coins_list):
appended_data = []
end_date = datetime.now()
start_date = end_date - timedelta(days = 700)
date_diff = (end_date - start_date).days
for coin in coins_list:
appended_data.append(get_coin_data(lunarcrush_key, coin, date_diff, start_date, end_date))
time.sleep(.1)
output = pd.concat(appended_data)
return(output)
df = get_all_coins_data(top_coin_names_lst)
focused_df = df[['coin', 'asset_id', 'time', 'close', 'volume', 'market_cap', 'reddit_posts', 'reddit_comments', 'tweets', 'tweet_favorites', 'social_volume']]
user = settings.DATABASES['default']['USER']
password = settings.DATABASES['default']['PASSWORD']
database_name = settings.DATABASES['default']['NAME']
database_url = 'sqlite://{user}:{password}#localhost:5432/{database_name}'.format(
user=user,
password=password,
database_name=database_name,
)
engine = create_engine(database_url, echo=False)
focused_df.to_sql(cryptoData, con=engine)
When I run the manage.py runserver command, I get the following error:
sqlalchemy.exc.ArgumentError: Invalid SQLite URL: sqlite://user:password#localhost:5432/C:\Users\user\Programming\django_crypto_v6\source\db.sqlite3
Valid SQLite URL forms are:
sqlite:///:memory: (or, sqlite://)
sqlite:///relative/path/to/file.db
sqlite:////absolute/path/to/file.db
I'm struggling to resolve this issue. Any thoughts?

you are using the wrong pattern for SQLite database_url
see the docs at https://docs.sqlalchemy.org/en/14/core/engines.html#sqlite

failed to execute script python exe

I'm trying to convert my python script to EXE.
What the script does is a basic analysis of an excel file, and generates a report in pdf.
Also in the script, I create a png file, then reload it to the pdf through the script.
I'm trying to convert the py file to EXE but it doesn't work :(
the script (works great as py file):
import pandas as pd
import os
from pandasql import sqldf
from datetime import datetime
import numpy as nu
from tkinter import *
import tkinter as tk
from fpdf import FPDF
import matplotlib.pyplot as plt
def start_gui(root):
myLabel = Label(root, text='Hi! Here you can output the sessions report').grid(row=0, column=0)
start_button = Button(root, text='Produce Report', padx=30, pady=20, command=main, fg='blue').grid(row=50, column=0)
root.mainloop()
pass
def print_full_results(df):
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
print(df)
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
pass
def load_data(path):
df = pd.read_csv(path)
df = pd.DataFrame(df)
return df
def clean_raw_data(raw_data):
raw_data = raw_data.dropna(how='all') # Drop the rows where all elements are missing.
raw_data = raw_data.dropna(axis=1, how='all') # Drop the columns where all elements are missing.
raw_data = raw_data.reset_index() # Reset the indexes after droping rows
raw_data = raw_data.drop(columns=['index'])
raw_data = raw_data.rename(
columns={'Meeting ID': 'Meeting_ID', 'User Name': 'Admin_User_Name', 'Uzer Eam1l': 'Admin_Email',
'Has Zoom Rooms?': 'Has_Zoom_Rooms', 'Creation Time': 'Meeting_Creation_Time',
'Start Time': 'Meeting_Start_Time', 'End Time': 'Meeting_End_Time',
'Duration (Minutes)': 'Meeting_Duration_min', 'Ncmf (prjgjncl Ncmf)': 'User_Name',
'Usfr fncil': 'User_Email', 'Join Time': 'User_Join_Time', 'Leave Time': 'User_Leave_Time',
'Duration (Minutes).1': 'User_Duration_min'})
raw_data = convert_relevant_types(raw_data)
raw_data = fill_null_emails(raw_data)
return raw_data
def convert_relevant_types(db):
pd.options.mode.chained_assignment = None # default='warn'
# relevant columns (Meeting_Creation_Time,Meeting_Start_Time,Meeting_End_Time,User_Join_Time,User_Leave_Time): convert string to date
for i in range(len(db['Meeting_Start_Time'])):
creation_date = datetime.strptime(db['Meeting_Creation_Time'][i], '%m/%d/%y %H:%M')
start_date = datetime.strptime(db['Meeting_Start_Time'][i], '%m/%d/%y %H:%M')
end_date = datetime.strptime(db['Meeting_End_Time'][i], '%m/%d/%y %H:%M')
user_join_date = datetime.strptime(db['User_Join_Time'][i], '%m/%d/%y %H:%M')
user_leave_date = datetime.strptime(db['User_Leave_Time'][i], '%m/%d/%y %H:%M')
db['Meeting_Creation_Time'][i] = creation_date
db['Meeting_Start_Time'][i] = start_date
db['Meeting_End_Time'][i] = end_date
db['User_Join_Time'][i] = user_join_date
db['User_Leave_Time'][i] = user_leave_date
# relevant columns (Meeting_Duration_min,User_Duration_min): convert string to int
for i in range(len(db['Meeting_Duration_min'])):
db['Meeting_Duration_min'][i] = int(db['Meeting_Duration_min'][i])
db['User_Duration_min'][i] = int(db['User_Duration_min'][i])
return db
def fill_null_emails(db):
for i in range(len(db['User_Email'])):
if pd.isnull(db['User_Email'][i]):
db['User_Email'][i] = db['User_Name'][i] + ' Missing Mail'
return db
def pdff_space_down(pdf):
pdf.cell(0, 10, '', ln=1, align='L')
return pdf
def pdff_write(pdf, text, space=5, align='L'):
pdf.cell(0, space, text, ln=1, align='L')
return pdf
def pdff_write_table(pdf, data, spacing=1.5):
col_width = pdf.w / 4.5
row_height = pdf.font_size
for row in data:
for item in row:
pdf.cell(col_width, row_height * spacing,
txt=item, border=1)
pdf.ln(row_height * spacing)
return pdf
def create_pdf(today,min_date, max_date, sessions_num, total_cost, costs_table, num_of_users, avg_users_come):
pdf = FPDF(orientation='p', unit='mm', format='A4')
pdf.add_page()
pdf.set_font('Arial', size=10)
pdf.cell(0, 10, 'Date:{}'.format(today), ln=1, align='L')
pdf.set_font('times', 'B', size=24)
pdf.cell(0, 8, 'Home Assignment - Ziv Mor', ln=1, align='C')
pdf.set_font('times', size=18)
pdf.cell(0, 10, 'Zoom-Sessions Report (Automated by Python)', ln=1, align='C')
pdf.cell(0, 10, '({}'.format(min_date) + ' To {})'.format(max_date), ln=1, align='C')
pdf.set_font('times', 'U', size=15)
pdf = pdff_write(pdf, 'Sessions Analysis', space=20)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Total Number of Sessions: {} (Team meetings are not include)'.format(sessions_num), space=15)
pdf.set_font('times', 'UB', size=13)
pdf.cell(0, 10, 'Number Of Sessions By Dates', ln=1.5, align='C')
pdf.image('sessions_by_day_plot.png', x=55, y=None, w=100, h=70, type='', link='')
pdf = pdff_space_down(pdf)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Sessions Participants Segmentation:', space=10)
pdf = pdff_write_table(pdf, costs_table)
pdf.set_font('times', 'UB', size=13)
pdf.cell(0, 20, 'Sessions Total Cost: {} NIS'.format(total_cost), ln=1, align='C')
pdf.set_font('times', 'U', size=15)
pdf = pdff_write(pdf, 'Users Analysis', space=17)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Total Number of Users Engaged: {}'.format(num_of_users), space=10)
pdf = pdff_write(pdf, 'The Average Frequency of Arrival of Each User : {} Sessions'.format(avg_users_come),
space=10)
pdf.output('Zoom Report_{}.pdf'.format(str(datetime.today()).replace(':', '.', 3)))
def main():
path = os.path.join(sys.path[0], 'participant sessions data.csv')
raw_data = load_data(path)
zoom_db = clean_raw_data(raw_data)
'''------------------------------SQL Queries---------------------------------'''
# todo asume פגישת צוות - not counted
question_1_query = 'Select date(Meeting_Start_Time)date, count(distinct Meeting_Start_Time)Num_Of_Sessions From zoom_db where Topic <>"פגישת צוות" Group by date(Meeting_Start_Time)'
answer_1_table = sqldf(question_1_query)
num_of_sessions = nu.sum(list(answer_1_table['Num_Of_Sessions']))
# count for each meeting the number of participants
question_2_query = 'Select Topic, Meeting_Start_Time, count(Distinct User_Email)num_of_Users From zoom_db Group by Meeting_Start_Time, Meeting_ID'
answer_2_table = sqldf(question_2_query)
# count for each user number of times the user arrived to session
# todo - mention I didnt concluded rows that user got in for less than 1 minute + there are a lot of users without mail so I assume for
question_3_query = 'select User_Email, count(*)num_of_arrivals from(Select User_Email, Meeting_Start_Time, Meeting_ID From zoom_db Where User_Duration_min <> 0 Group by User_Email, Meeting_ID , Meeting_Start_Time) group by User_Email Order by num_of_arrivals desc'
answer_3_table = sqldf(question_3_query)
# Calculate the avg times of arrival of users (Using the result of 3'rd question query #todo - asumming not conclud the host
participants_arrivals_list = list(answer_3_table['num_of_arrivals'])[1:]
avg_users_come = round((nu.average(participants_arrivals_list)), 2)
'''---------------------More Calculates for the report------------------------'''
# Calculate the intervals of dates
min_date_qu = sqldf('select min(date(Meeting_Start_Time)) from zoom_db')
min_date_qu = list(min_date_qu['min(date(Meeting_Start_Time))'])[0]
max_date_qu = sqldf('select max(date(Meeting_Start_Time)) from zoom_db')
max_date_qu = list(max_date_qu['max(date(Meeting_Start_Time))'])[0]
num_meetings0_5 = sqldf('select count(*) from answer_2_table where num_of_users<=5 and Topic <>"פגישת צוות"')
num_meetings0_5 = list(num_meetings0_5['count(*)'])[0]
num_meetings5_10 = sqldf(
'select count(*) from answer_2_table where num_of_users>5 and num_of_users<=10 and Topic <>"פגישת צוות"')
num_meetings5_10 = list(num_meetings5_10['count(*)'])[0]
num_meetings10_15 = sqldf(
'select count(*) from answer_2_table where num_of_users>10 and num_of_users<=15 and Topic <>"פגישת צוות"')
num_meetings10_15 = list(num_meetings10_15['count(*)'])[0]
num_meetings_15_plus = sqldf('select count(*) from answer_2_table where num_of_users>15 and Topic <>"פגישת צוות"')
num_meetings_15_plus = list(num_meetings_15_plus['count(*)'])[0]
total_cost = 50 * num_meetings0_5 + 100 * num_meetings5_10 + 150 * num_meetings10_15 + 200 * num_meetings_15_plus
costs_table = [['Session type', 'Number of sessions', 'Cost'],
['0-5 participants', str(num_meetings0_5), str(50 * num_meetings0_5)],
['5-10 participants', str(num_meetings5_10), str(100 * num_meetings5_10)],
['10-15 participants', str(num_meetings10_15), str(150 * num_meetings10_15)],
['15+ participants', str(num_meetings_15_plus), str(200 * num_meetings_15_plus)]]
sessions_by_day_plot = answer_1_table.plot.bar(x='date', y='Num_Of_Sessions', rot=80)
plt.savefig('sessions_by_day_plot.png')
num_of_users = sqldf('select count(*) From answer_3_table')
num_of_users = list(num_of_users['count(*)'])[0]
today = datetime.today().strftime("%b-%d-%Y")
'''----------------------------------Out-Put Results------------------------'''
create_pdf(today = today , max_date=max_date_qu, min_date=min_date_qu, sessions_num=num_of_sessions,
total_cost=total_cost, costs_table=costs_table, num_of_users=num_of_users, avg_users_come=avg_users_come)
writer = pd.ExcelWriter('Zoom Report_{}.xlsx'.format(str(datetime.today()).replace(':', '.', 3)))
(answer_2_table).to_excel(writer , sheet_name='Sessions Number of Participants')
(answer_3_table).to_excel(writer, sheet_name='Participants show-up')
writer.save()
'''---------------------Delete not relevant files------------------------'''
plot1_path = os.path.join(sys.path[0], 'sessions_by_day_plot.png')
os.remove(plot1_path)
exit()
if __name__ == '__main__':
root = Tk()
start_gui(root)
# main()

Interconnection of two dropdowns in plotly-dash

I have been trying to connect two dropdowns to each other i.e. If I select a value from one DropDown the path and the contents of the other DropDown should change accordingly.
e.g I have a folder of dates (06-06-2020, 07-06-2020 and 08-06-2020) so if I select a date which is 07-06-2020 the DropDown below it should display the values which are inside the date folder 07-06-2020.
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from dash.exceptions import PreventUpdate
import plotly.graph_objects as go
import os
import pandas as pd
os.chdir(r"C:\Users\ankit\Desktop\dash_assign\SessionCode")
dir_route = os.getcwd()
app = dash.Dash(__name__)
g_t = ["OHLC Bars", "Candlesticks"]
# external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# csv_list = [files for files in os.listdir(dir_route)]
csv_list = []
date_list = []
path_list = []
for dates in os.listdir(dir_route):
date = dates
date_list.append(date)
app.layout = html.Div(
children=[
html.H1("Time Series Graph"),
dcc.Dropdown(id="select_date",
options=[{"label": dates, "value": str(dates)} for dates in date_list],
value=20180102,
style={
"margin": "10px"
}
),
dcc.Dropdown(id="sym",
placeholder="Select Symbol",
style={
"margin": "10px"
}),
# html.Button("PLOT", id='plot'),
dcc.Graph(id='graph')
]
)
#app.callback(Output('sym', component_property="options"),
[Input('select_date', 'value')])
def update_dates(dat):
lst = os.listdir(os.path.join(dir_route, dat))
for files in lst:
if files.endswith(".csv"):
files.split(" ")
new_file = files[0]
return new_file
#app.callback(Output('graph', 'figure'),
[Input('select_date', 'value'),
Input("sym", "options"),
])
def update_graph(date, symbols):
path = os.path.join(dir_route, date, symbols)
df = pd.read_csv(os.path.join(path+".csv"), index_col=False)
fig = go.Figure(go.Ohlc(x =df["TimeStamp"],
open=df['Open'],
high=df['High'],
low=df['Low'],
close=df['Close'],
increasing_line_color='#00ff00',
decreasing_line_color='#ff0000',
name="Price",
))
return fig
if __name__ == "__main__":
# fig = update_candle("A.csv")
# fig.show()
app.run_server(debug=True, port=5001)
This is my root directory and inside it i have various csv's which should appear in the DropDown once I select the dates
csv files inside one date directory

You can use a callback to update the second dropdown. Below is an example where cascading dropdown is created from table
#app.callback(
Output(component_id='id_first_dropdown', component_property='options'),
[Input(component_id='id_second_dropdown', component_property='value')]
)
def update_dp(filter_value):
sql = "Select distinct second_dropdown_options from table where first_dropdown_value='" + filter_value + "'"
df = pd.read_sql_query(sql, conn)
df = df['second_dropdown_options']
return [{'label': i, 'value': i} for i in df.unique()]

Import and parse .data file

there is a file I tried to import and safe as pandas df. At a first sight looks like it's already columns and rows ordered, but finally I had to do a bunch of stuff to create pandas df. Could you please check if there is much faster way to manage it?
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
My way of doing it is:
import requests
import pandas as pd
r = requests.get(url)
file = r.text
step_1 = file.split('\n')
for n in range(len(step_1)): # remove empty strings
if bool(step_1[n]) == False:
del(step_1[n])
step_2 = [i.split('\t') for i in step_1]
cars_names = [i[1] for i in step_2]
step_3 = [i[0].split(' ') for i in step_2]
for e in range(len(step_3)): # remove empty strings in each sublist
step_3[e] = [item for item in step_3[e] if item != '']
mpg = [i[0] for i in step_3]
cylinders = [i[1] for i in step_3]
disp = [i[2] for i in step_3]
horsepower = [i[3] for i in step_3]
weight = [i[4] for i in step_3]
acce = [i[5] for i in step_3]
year = [i[6] for i in step_3]
origin = [i[7] for i in step_3]
list_cols = [cars_names, mpg, cylinders, disp, horsepower, weight, acce, year, origin]
# list_labels written manually:
list_labels = ['car name', 'mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin']
zipped = list(zip(list_labels, list_cols))
data = dict(zipped)
df = pd.DataFrame(data)

When you replaced \t to blankspace, you can use read_csv to read it. But you need to wrap up your text, because the first parameter in read_csv is filepath_or_buffer which needs object with a read() method (such as a file handle or StringIO). Then your question can be transform to read_csv doesn't read the column names correctly on this file?
import requests
import pandas as pd
from io import StringIO
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
r = requests.get(url)
file = r.text.replace("\t"," ")
# list_labels written manually:
list_labels = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin','car name']
df = pd.read_csv(StringIO(file),sep="\s+",header = None,names=list_labels)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(df)

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Uniting two dataframes (pandas) into one excel file and save it using tkinter filedialog - excel

Related

Function to parse a dataframe into a SQL table

Inserting pandas dataframe into django model

failed to execute script python exe

Interconnection of two dropdowns in plotly-dash

Import and parse .data file

Categories

Resources