Callback error updating output-link.href - Parsing and downloading CSV file - python-3.x

Aim and objective: I am beginner to web development and I'm using plotly Dash to allow user to upload a CSV file, run a script to modify the CSV file and let the user download the the modified CSV file.
Tools employed: I am using dash.core.components to achieve this. I use dcc.upload to allow the user to upload a file and using html.A to return a link to the modified csv file which allows the user to download the file. All modifications to csv file performed using Pandas.
Issue: However, when I'm trying to return the href for the modified file, I run into a callback error.
Code Description: For reference in the below code, the function interval_cleaner reads the csv file, identifies missing data and fills the missing data.The interval_cleaner basically takes the contents component of dcc.upload as an input and decodes the contents and returns a dataframe. In the callback function "update_output_parser", I call the interval_cleaner function which returns a pandas dataframe (clean_dat) which is then converted to a csv file which I'm trying to return as the href component of html.A.
Data: I have 21886 rows in the original file but output is around 17520 rows.
df=pd.DataFrame({'Date':[7/1/2019 0:30,7/1/2019 1:00,7/1/2019 1:30,7/1/2019 2:00],'Demand':[60.48,52.92,49.32,53.28]})
import pandas as pd
import datetime
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import urllib
import base64
import io
app=dash.Dash()
app.layout=html.Div([
html.H3('Unit of the data'),
dcc.RadioItems(
id='units',
options=[{'label':'kWh','value':'kwh'},
{'label':'kW','value':'kw'}],
value='kwh',
labelStyle={'display':'inline-block'}
),
html.H3('Interval of load data'),
dcc.RadioItems(
id='intervals',
options=[{'label':'15 minutes','value':15},
{'label':'30 minutes','value':30},
{'label':'Hourly','value':60}],
value=60,
labelStyle={'display':'inline-block'}
),
html.H3('Convert into other intervals'),
dcc.RadioItems(
id='conversion',
options=[{'label':'Yes','value':'y'},
{'label':'No','value':'n'}],
value='n',
labelStyle={'display':'inline-block'}
),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=True
),
html.Div(id='Filename'),
html.A('Download cleaned load',id='output-link',download='cleaned_load_data.csv',target='_blank'),
])
def interval_cleaner(csv_file_content,unit,frequency):
content_type, content_string = csv_file_content.split(',')
decoded = base64.b64decode(content_string)
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
df['Date']=df['Date'].astype('datetime64[ns]')
df=df.set_index('Date').sort_index()
if frequency==15:
mult=4
elif frequency==30:
mult=2
else:
mult=1
cleaned_load=pd.DataFrame(pd.date_range(start=df.index[0], end=df.index[0]+pd.to_timedelta('364 days 23:59:00'),freq=str(frequency)+'T'),columns=['Date']) #365 days changed to 364 days
leap=len(cleaned_load[((cleaned_load['Date'].dt.month==2) & (cleaned_load['Date'].dt.day==29))])
if leap!=0:
cleaned_load=cleaned_load[~((cleaned_load['Date'].dt.month==2) & (cleaned_load['Date'].dt.day==29))]
if frequency==60:
cleaned_load=cleaned_load.append(pd.DataFrame(pd.date_range(cleaned_load.iloc[len(cleaned_load)-1,0]+pd.to_timedelta('01:00:00'),cleaned_load.iloc[len(cleaned_load)-1,0]+pd.to_timedelta('1 day'),freq=str(frequency)+'T'),columns=['Date']),ignore_index=True)
else:
cleaned_load=cleaned_load.append(pd.DataFrame(pd.date_range(cleaned_load.iloc[len(cleaned_load)-1,0]+pd.DateOffset(minute=frequency),cleaned_load.iloc[len(cleaned_load)-1,0]+pd.to_timedelta('1 day'),freq=str(frequency)+'T'),columns=['Date']),ignore_index=True)
cleaned_load=cleaned_load.set_index('Date')
cleaned_load=cleaned_load.join(df,how='left')
cleaned_load=cleaned_load[~cleaned_load.index.duplicated(keep='last')].reset_index()
#print(cleaned_load.head())
cleaned_load['MDH']=cleaned_load['Date'].dt.strftime('%m-%d %H:%M')
nans=cleaned_load[cleaned_load['Demand'].isnull()].index
#print(nans)
#print(cleaned_load.iloc[nans,0])
for item in nans:
if item<24*mult*7:
ind=cleaned_load.iloc[item:len(cleaned_load)+item:24*mult,1].first_valid_index() # 70 day changed to len()
cleaned_load.iloc[item,1]=cleaned_load.iloc[ind:ind+24*mult*7:24*mult,1].mean(skipna=True)
else:
cleaned_load.iloc[item,1]=cleaned_load.iloc[item-24*mult*7:item:24*mult,1].mean(skipna=True)
cleaned_load=cleaned_load.sort_values(by='MDH')
return cleaned_load
#app.callback(Output('output-link', 'href'),
Input('upload-data', 'contents'),
[State('units', 'value'),
State('intervals', 'value')])
def update_output_parser(file_contents,unit,frequency):
clean_dat=interval_cleaner(file_contents,unit,frequency)
dat_csv=clean_dat.to_csv(encoding='utf-8')
dat_csv = "data:text/csv;charset=utf-8," + urllib.quote(dat_csv)
return dat_csv
if __name__ == '__main__':
app.run_server(debug=True)

Related

Return Excel file from Azure Function via HTTP using Python

Use Case
Within a Logic App, I create some data using an Azure Function with a Pandas DataFrame. After employing the Azure Function, I want to further process the data in .xlsx format within the Logic App. Therefore I need the Azure Function to return an .xlsx file.
Problem
I am unable to format the HTTPResponse of my Azure Function so that I can further process the .xlsx file within the Logic App. Basically I require the correct conversion from my Pandas DataFrame to the HTTPResponse.
What to do in convert_to_xlsx() (see below) to achieve the desired output?
Toy Example
import azure.functions as func
import logging
import pandas as pd
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
def main(req: func.HttpRequest) -> func.HttpResponse:
df = pd.DataFrame(np.random.randint(0, 100, size=(2, 4)), columns=list('ABCD'))
excel = convert_to_xlsx(df)
return func.HttpResponse(excel, status_code=200)
def convert_to_xlsx(df):
# Create excel representation
wb = Workbook()
sheet = wb.active
for row in dataframe_to_rows(df, index=False, header=True):
sheet.append(row)
logging.info('sheet: ' + str(list(sheet.values))) # So far, so good.
# Convert for HTTPResponse
res = ''
res = do_something(sheet) # <---- What to do here?
return res
What I tried
I tried converting the data to openpyxl's Workbook, which worked fine. But then I did not know how to proceed from here to convert from a Workbook.
Also, there is this answer using xlrd, which I could not get to work for my use case. Additionally, xlrd does not support .xlsx anymore. Based on that post, I tried the following, which did not work as intended:
def convert_to_xlsx(df):
# ... see above
# Only returns column names without values.
# Also, apparently not the right format?
return f'{[row for row in sheet]}'
One option might be to return some kind of JSON response and then convert it back to an excel file within the logic app. But I hoped that I might be able to skip that and immediately return a .xlsx file from the function as HTTP payload.
In order to obtain an Excel file you also have to manipulate the header, see https://stackoverflow.com/a/67276395/7641854.
Without the changed header you will obtain a zip object I assume.
Thus a working example to return an Excelfile via Azure function looks like this:
import pandas as pd
import azure.functions as func
def main(req: func.HttpRequest) -> func.HttpResponse:
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
buffer = io.BytesIO()
excel_buf = df.to_excel(buffer, index=False)
return func.HttpResponse(
buffer.getvalue(),
headers={"Content-Disposition": 'attachment; filename="test.xlsx"'},
mimetype='application/vnd.ms-excel',
status_code=200,
)
The approach could be to write the output to a buffer and return the buffer's content within the HTTPResponse
...
from io import BytesIO
def main(req: func.HttpRequest) -> func.HttpResponse:
df = pd.DataFrame(np.random.randint(0, 100, size=(2, 4)), columns=list('ABCD'))
buffer = BytesIO()
excel_buf = df.to_excel(buffer)
return func.HttpResponse(buffer.getvalue(), status_code=200)
However, due to concerns regarding file size and execution times for returning large files via http, I opted for uploading the resulting excel to an Azure BLOB storage, by using something like (snippet):
...
out_blob = BlobClient.from_connection_string(...)
excel_buf = df.to_excel(buffer)
out_blob.upload_blob(buffer.getvalue())
...

How do I read a request.FILES into DataSource in Geodjango

So, the goal is to create a webpage to load a .shp file into and get a summary of some calculations as a JsonResponse. I have prepared the calculations and everything and it works nicely when I add a manual path to the file in question. However, the goal is for someone else to be able to upload the data and get back the response so I can't hardcode my path.
The overall approach:
Read in a through forms.FileField() and request.FILES['file_name']. After this, I need to transfer this request.FILES object to DataSource in order to read it in. I would rather not upload the file on pc if possible but work directly from the memory.
forms.py
from django import forms
from django.core.files.storage import FileSystemStorage
class UploadFileForm(forms.Form):
# title = forms.CharField(max_length=50)
file = forms.FileField()
views.py
import json
import os
from django.http import Http404, HttpResponse, HttpResponseRedirect
from django.shortcuts import render
from django.template import loader
from django.contrib import messages
from django.views.generic import TemplateView
from django.http import JsonResponse
from django.conf import settings
from .forms import UploadFileForm
from . import models
from django.shortcuts import redirect
from gisapp.functions.functions import handle_uploaded_file, handle_uploaded_file_two
from django.contrib.gis.gdal import DataSource
from django.core.files.uploadedfile import UploadedFile, TemporaryUploadedFile
import geopandas as gpd
import fiona
def upload_file(request):
if request.method == 'POST':
form = UploadFileForm(request.POST, request.FILES)
if form.is_valid():
f = request.FILES['file']
# geo2 = gpd.read_file(f)
# print(geo2)
# f_path = os.path.abspath(os.path.join(os.path.dirname(f), f))
# f_path = TemporaryUploadedFile.temporary_file_path(UploadedFile(f))
# print(f_path)
# f_path = f.temporary_file_path()
# new_path = request.FILES['file'].temporary_file_path
# print(f'This is file path: {f_path}')
# print(f'This is file path: {new_path}')
# data = DataSource(f'gisapp/data/{f}') -- given an absolute path it works great
data = DataSource(f) -- constantly failing
# data = DataSource(new_path)
# print(f'This is file path: {f_path}')
layer = data[0]
if layer.geom_type.name == "Polygon" or layer.geom_type.name == "LineString":
handle_uploaded_file(request.FILES['file'])
elif layer.geom_type.name == "Point":
handle_uploaded_file_two(request.FILES['file'])
return JsonResponse({"Count": f"{handle_uploaded_file_two(request.FILES['file'])[0]}", "Bounding Box": f"{handle_uploaded_file_two(request.FILES['file'])[1]}"})
# return JsonResponse({"Count": f"{handle_uploaded_file(request.FILES['file'])[0]}", "Minimum": f"{handle_uploaded_file(request.FILES['file'])[1]}", "Maximum": f"{handle_uploaded_file(request.FILES['file'])[1]}"})
# instance = models.GeometryUpload(file_field=request.FILES['file'])
# instance.save()
# # return HttpResponseRedirect('/success/')
else:
form = UploadFileForm()
return render(request, 'upload.html', {'form': form})
Error I get:
django.contrib.gis.gdal.error.GDALException: Invalid data source input type: <class 'django.core.files.uploadedfile.InMemoryUploadedFile'>
Now as you can see from the upload_file() in views.py, I tried a multitude of operations and when I add an absolute path, it works, but besides that I can't seem to upload the file to DataSource so that I can use it in my later analysis.
Looking at how Django handles this, it doesn't appear possible to work off of an in memory file. The path to the file is passed to the C API for OGR which then handles opening the file and reading it in.
A possible solution that I am trying myself is to have the user zip their shape files (.shp,.shx.,dbf etc.) beforehand. The zip file is then uploaded and unzipped. The shp files can then be read. Hope this helps
I face the same problem and my workaround was to save the file upload by the user in a temporary folder, then pass the absolute path of the temporary file to my DataSource. After finish all my process with the temporary file, I deleted.
The downside of this method is the execution time, is slow.

Send xlsx file using SES in AWS lambda function

SITUATION
I have created a lambda function the output of which is a an Excel file that gets saved to an S3 bucket. This part of the function works as expected.
As part of the functions operation I would also like to be able to email the generated Excel file to selected recipients.
CODE:
#IMPORT MODULES
import boto3
import pandas as pd
import io
from io import BytesIO
from io import StringIO
from datetime import date
import email
import email.header
import email.policy
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
def lambda_handler(event, context):
# GENERATE CURRENT DATE TO APPEND TO FILE
today = date.today()
date_val = today.strftime("%B %d, %Y")
# CREATE DATAFRAME
df = pd.DataFrame({'Data': [10, 22, 31, 43, 57, 99, 65, 74, 88]})
# EVALUATE VARIABLES AS ODD OR EVEN INTEGERS
even = df.loc[df['Data']%2 == 0]
odd = df.loc[df['Data']%2 != 0]
# SPECIFY BUKCET NAME AND OUTPUT FILE PATH
bucket = 'my-bucket'
filepath = 'output/My_Excel_File_{}.xlsx'.format(date_val)
# EXPORT MULTI-SHEET EXCEL FILE AND SEND TO S3 BUCKET
with io.BytesIO() as output:
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
even.to_excel(writer, sheet_name = 'Even')
odd.to_excel(writer, sheet_name = 'Odd')
data = output.getvalue()
s3 = boto3.resource('s3')
s3.Bucket(bucket).put_object(Key=filepath, Body=data)
WHAT I HAVE TRIED
I have tried to achieve my aim by appending the following code to my function by referring to various documentation, however this does not achieve the desired result.
# EXPORT MULTI-SHEET EXCEL FILE AND SEND TO S3 BUCKET
message = MIMEMultipart()
message['Subject'] = 'Email subject'
message['From'] = 'sender.email#domain.com'
message['To'] = 'recipient.email#domain.com')
# MESSAGE BODY
part = MIMEText('Thus is the email body string', 'html')
message.attach(part)
# ATTACHEMENT
if attachment_string: # if bytestring available
part = MIMEApplication(str.encode('attachment_string'))
else: # if file provided
part = MIMEApplication(s3.get_object(Bucket='my-bucket', Key=My_Excel_File_{}.xlsx'.format(date_val)).read())
part.add_header('Content-Disposition', 'attachment', filename='My_Excel_File_{}.xlsx'.format(date_val)')
message.attach(part)
response = client.send_raw_email(
Source=message['From'],
Destinations=['recipient.email#domain.com'],
RawMessage={
'Data': message.as_string()
}
)
There are AWS examples that dynamically create excel docs and email them. In this use cases, they are implemented in Java and the app is a web app. See this:
Creating the DynamoDB web application item tracker
Although this example uses the AWS SDK for Java V2, it will give you an idea and hopefully you can port to the programming language you are using.

How to run python function by clicking html button?

I am trying to make this web app to work but I am getting an error. these are the steps that web app is supposed to handle:
import a file
run the python script
export the results
when I run python script independently( without interfering with flask), it works fine( I use Jupyter notebook) on the other hand, when I run it with flask (from prompt) I get an error:
File "app.py", line 88, in <module>
for name, df in transformed_dict.items():
NameError: name 'transformed_dict' is not defined
Any idea of how can I make this web app to work?
This is my first time using flask and I will appreciate any suggestions or guidance.
python file & html file
from flask import Flask,render_template,request,send_file
from flask_sqlalchemy import SQLAlchemy
import os
import pandas as pd
from openpyxl import load_workbook
import sqlalchemy as db
def transform(df):
# Some data processing here
return df
app=Flask(__name__)
#app.route('/')
def index():
return render_template('firstpage.html')
#app.route('/upload',methods=['Get','POST'])
def upload():
file=request.files['inputfile']
xls=pd.ExcelFile(file)
name_dict = {}
snames = xls.sheet_names
for sn in snames:
name_dict[sn] = xls.parse(sn)
for key, value in name_dict.items():
transform(value)
transformed_dict={}
for key, value in name_dict.items():
transformed_dict[key]=transform(value)
#### wirte to excel example:
writer = pd.ExcelWriter("MyData.xlsx", engine='xlsxwriter')
for name, df in transformed_dict.items():
df.to_excel(writer, sheet_name=name)
writer.save()
if __name__=='__main__':
app.run(port=5000)
Your block:
#### wirte to excel example:
writer = pd.ExcelWriter("MyData.xlsx", engine='xlsxwriter')
for name, df in transformed_dict.items():
df.to_excel(writer, sheet_name=name)
writer.save()
should be part of your upload() function since that's where you define and fill transformed_dict. You just need to match the indentation there to the block above it.
The current error is coming up because it's trying to run that code as soon as you start your script, and transformed_dict doesn't exist at that point.

Dash python plotly live update table

I am new to plotly dash. I want to draw a table whose values (Rows) will
automatically be updated after certain interval of time but i do not know how
to use dash table experiments. The table is already saved as CSV file but i
am somehow unable make it live.
Please help!
Can some one guide me in the right direction what should i do
Your help will be highly appreciated. Following is the code.
import dash
import pandas as pd
from pandas import Series, DataFrame
from dash.dependencies import Input, Output, Event
import dash_core_components as dcc
import dash_html_components as html
import dash_table_experiments as dtable
app=dash.Dash()
def TP_Sort():
address = 'E:/Dats Science/POWER BI LAB DATA/PS CORE KPIS/Excel Sheets/Throughput.xlsx'
TP = pd.read_excel(address)
TP1=TP.head()
Current_Interval.to_csv('TP1.csv', index=False)
return app.layout = html.Div([
html.H1('Data Throughput Dashboard-NOC NPM Core'),
dcc.Interval(id='graph-update',interval=240000),
dtable.DataTable(id='my-table',
rows=[{}],
row_selectable=False,
filterable=True,
sortable=False,
editable=False)
])
#app.callback(
dash.dependencies.Output('my-table','row_update'),
events=[dash.dependencies.Event('graph-update', 'interval')])
def update_table(maxrows=4):
TP_Sort()
TP_Table1='C:/Users/muzamal.pervez/Desktop/Python Scripts/TP1.csv'
TP_Table2=pd.read_csv(TP_Table1)
return TP_Table2.to_dict('records')
if __name__ == '__main__':
app.run_server(debug=False)
I am trying the above approach. Please correct me where i am wrong as the output is error loading dependencies.
BR
Rana
Your callback is wrong.
It should be:
#app.callback(Output('my-table', 'rows'), [Input('graph-update', 'n_intervals')])
def update_table(n, maxrows=4):
# We're now in interval *n*
# Your code
return TP_Table2.to_dict('records')

Resources