Power BI with ggplot in python - python-3.x

I have created below code in python for Power BI. This is not showing anything.
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script:
# dataset = pandas.DataFrame(Company, Target)
# dataset = dataset.drop_duplicates()
# Paste or type your script code here:
import matplotlib.pyplot as plt
from plotnine import *
from plotnine.data import mpg
(ggplot(dataset) # data
+ aes(x='Company') # variable
+ geom_bar(size=20)) # ype of plot
plt.show()

Related

I am running a code on Regression on student grades and hours although i got the table right as output but the scattered plot is not showing

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# Load data
df = pd.read_csv('Grade_Set_1.csv')
print (df)
# Simple scatter plot
df.plot(kind='scatter', x='Hours_Studied', y='Test_Grade', title='Grade vs Hours Studied')
# check the correlation between variables
print(df.corr())
I got this key error on :
KeyError: 'Hours_Studied'

Plotting information from certain Excel spreadsheet

I have an Excel file with various spreadsheets and I want to create a graph from a certain spreadsheet (Details) with plotly.
I use the following code, but the f = Path.cwd().joinpath('MyFile.xlsm') seems to be an issue because I use this command wrong...but actually I dont know how to use it correctly.
Thanks.
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import openpyxl as xl
from pathlib import Path
# Get some data
df1 = pd.read_excel('MyFile.xlsm')
f = Path.cwd().joinpath('MyFile.xlsm')
# Adding information to the graph
wb = xl.load_workbook(f)
for sheet in wb.worksheets:
if sheet.title != 'Details':
fig.add_trace(go.Scatter(x=df1['Odo0 [m]'], y=df1['Speed Odo1 [m/s]'], mode='lines', name='Speed Odo1'),
secondary_y=False,
)
#fig.add_trace(
# go.Scatter(x=df['Distance [m]'], y=df['Speed [m/s]'], mode='lines', name='Speed'),
# secondary_y=True,
#)
# Set x-axis title
fig.update_xaxes(title_text="<b>Distance (m)<b>")
# Set y-axes titles
fig.update_yaxes(title_text="<b>Speed (m/s)<b>", secondary_y=False)
# Show plot
fig.write_html("DADDB22W0.html")
Ok, I solved it:
df1 = pd.read_excel('MyFile.xlsm', sheet_name='Details')

Python Display NC File Variable Description as Plot Title

I need to use the "description" as my chart or plot title and I cannot find a way to do this in my internet searches. The output from the .nc file variable that has the "description" that I need looks like this:
<class 'netCDF4._netCDF4.Variable'>
float64 M(lat, on)
_FillValue: nan
long_name: Wind Speed at 100m
description: Anomaly for June 2021 vs the previous 30 years
unlimited dimensions:
current shape = (2920, 7200)
My code looks like this:
# -*- coding: utf-8 -*-
"""
#author: U321103
"""
from sys import exit
import netCDF4 as nc4
from netCDF4 import Dataset
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
#from mpl_toolkits.basemap import Basemap, cm
import datetime
from datetime import datetime
import pandas as pd
import xarray as xr
import bottleneck as bn
import cartopy.crs as ccrs
from mpl_toolkits.basemap import Basemap
import os
os.environ["PROJ_LIB"] = 'C:\\Users\\Yury\\anaconda3\\Library\\share'
# -----------------------------------------------------------------------------------------------------------
#
# -----------------------------------------------------------------------------------------------------------
#%matplotlib inline
#The easiest way to read the data is:
path = "//porfiler03/gtdshare/VORTEX/ANOMALY_FILES/anomaly.M.2021.06.vs30y/world.nc"
# Open the NetCDF file
fh = Dataset(path)
#read variables in fh
for var in fh.variables.values():
print(var)
# Get the 100m wind speed
wind100 = fh['M'][:]
#wind100_units = fh['M'].units
# Get the latitude and longitude points
lats = fh.variables['lat'][:]
lons = fh.variables['lon'][:]
# Get some parameters for the Stereographic Projection
lon_0 = lons.mean()
lat_0 = lats.mean()
#m = Basemap(width=25000000,height=12000000,
# resolution='l',projection='lcc',\
# lat_ts=50,lat_0=lat_0,lon_0=lon_0)
m = Basemap(projection='merc',llcrnrlat=-40,urcrnrlat=60,\
llcrnrlon=-180,urcrnrlon=180,lat_ts=20,resolution='c')
# help on coordinates: https://matplotlib.org/basemap/users/merc.html
fh.close()
# Because our lon and lat variables are 1D,
# use meshgrid to create 2D arrays
# Not necessary if coordinates are already in 2D arrays.
lon, lat = np.meshgrid(lons, lats)
xi, yi = m(lon, lat)
# Plot Data
cs = m.pcolor(xi,yi,np.squeeze(wind100))
# Add Grid Lines
m.drawparallels(np.arange(-80., 81., 40.), labels=[1,0,0,0], fontsize=10)
m.drawmeridians(np.arange(-180., 181., 40.), labels=[0,0,0,1], fontsize=10)
# Add Coastlines, States, and Country Boundaries
m.drawcoastlines()
m.drawstates()
m.drawcountries()
# Add Colorbar
cbar = m.colorbar(cs, location='bottom', pad="10%")
#cbar.set_label(wind100_units)
# Add Title
plt.title(' ')
plt.show()
exit()
So, what I need exactly is "Anomaly for June 2021 vs the previous 30 years" to add to the plot below in the line with plt.title() - thank you!
You should add this line of code wind100_description = fh['M'].description somewhere before fh.close(). Then simply do plt.title(wind100_description) instead of plt.title(' '). Also, it's a good practice to remove the imports you don't need, of which you have quite a few :)

Programming a simple Python Stock Service. How can I program this to only show the graph for only a few seconds?

This is the current programs with no errors at all...
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.techindicators import TechIndicators
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
# Your key here
key = 'W01B6S3ALTS82VRF'
# Chose your output format, or default to JSON (python dict)
ts = TimeSeries(key, output_format='pandas')
ti = TechIndicators(key)
# Get the data, returns a tuple
# aapl_data is a pandas dataframe, aapl_meta_data is a dict
aapl_data, aapl_meta_data = ts.get_daily(symbol='AAPL')
# aapl_sma is a dict, aapl_meta_sma also a dict
aapl_sma, aapl_meta_sma = ti.get_sma(symbol='AAPL')
# Visualization
figure(num=None, figsize=(15, 6), dpi=80, facecolor='w', edgecolor='k')
aapl_data['4. close'].plot()
plt.tight_layout()
plt.grid()
plt.show()
I want it to hide the graph after a few seconds. Is this possible?
You can auto-close the PyPlot figure by adjusting the last line here and adding two more lines. Block, which is set to True by default, stops the python script from continuing to run until the figure is manually closed. By setting block to false, your code will continue to run and you can complete other tasks such as closing the figure or replacing it with a different plot.
plt.show(block=False)
plt.pause(4)
plt.close()
This will close the figure after 4 seconds.

Sum of residuals of scipy regression model

I am going through a stats workbook with python, there is a practice hands on question on which i am stuck. Its related to Poisson regression and here is the problem statement:-
Perform the following tasks:
Load the R data set Insurance from MASS package and Capture the data as pandas data frame
Build a Poisson regression model with a log of an
independent variable, Holders and dependent variable Claims.
Fit the model with data.
Find the sum of residuals.
I am stuck with point 4 above. Can anyone help with this step?
Here is what i have done so far :-
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
df = sm.datasets.get_rdataset('Insurance', package='MASS', cache=False).data
poisson_model = smf.poisson('np.log(Holders) ~ -1 + Claims', df)
poisson_result = poisson_model.fit()
print(poisson_result.summary())
Now how to get sum of residuals?
np.sum(poisson_result.resid)
works fine
You have used the wrong variables to build the poisson model as pointed out by Karthikeyan.
Use this instead,
poisson_model = smf.poisson('Claims ~ np.log(Holders)',df)
Try below code for Fresco play
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np
df_insurance=sm.datasets.get_rdataset("Insurance","MASS")
df_data=df_insurance.data
insurance_model=smf.poisson('Claims ~ np.log(Holders)', df_data).fit()
print(np.cumsum(insurance_model.resid))
1.a) Load the R data set Insurance from MASS package
1.b) and Capture the data as pandas data frame
2) Build a Poisson regression model with a log of an independent variable, Holders and dependent variable Claims.
3) Fit the model with data.
4) Find the sum of residuals.
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np
#load the R data set insurrance from MASS package
ins = sm.datasets.get_rdataset('Insurance','MASS').data
# capture the data as pandas data frame
ins_pd = pd.DataFrame(ins)
# build a poisson regressions model with
# a log of an independent variable "Holders"
# and dependent variable "Claims"
# fit the model with data
result = smf.poisson('Claims ~ np.log(Holders)',data=ins).fit()
# you can also use
# model = smf.poisson('Claims ~ np.log(Holders)',data=ins)
# result = model.fit()
# Find tue sum of residuals
print('Sum ot the residuals:',np.sum(result.resid))
i'm new on this so i don't know if capture the data as panda dataframe is fine or not but letme now
greetings
Fresco Mex
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np
df_data=sm.datasets.get_rdataset("Insurance","MASS").data
df_dataf= pd.DataFrame(df_data)
insurance_model=smf.poisson('Claims ~ np.log(Holders)',df_data)
insurance_model_result=insurance_model.fit()
print(np.sum(insurance_model_result.resid))
in the poisson_model = smf.poisson('np.log(Holders) ~ -1 + Claims', df) statement, the dependent variable "Claims" should come in the right hand side
poisson_model = smf.poisson('Claims ~ np.log(Holders)-1 ', df)
this qualified in "Fresco" if anyone is looking for the solution
df_insurance=sm.datasets.get_rdataset("Insurance","MASS")
df_data=df_insurance.data
insurance_model=smf.poisson('Claims ~ np.log(Holders)',df_data)
insurance_model_result=insurance_model.fit()
res=(insurance_model_result.resid)
print(np.sum(res))
I don't know it will work or not .but I refer this docs
https://vincentarelbundock.github.io/Rdatasets/doc/MASS/Insurance.html
https://vincentarelbundock.github.io/Rdatasets/datasets.html
So I hope this will work too.
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
data=pd.DataFrame(sm.datasets.get_rdataset("Insurance","MASS",cache=True).data)
model=smf.poisson('Claims ~ District + Group + Age + np.log(Holders)',data).fit()
print(np.sum(model.resid))
Try np.cumsum(model.resid) for this question.
Ideally np.sum(model.resid) should be the right answer for the question... But if the system is not accepting it, try the cumsum

Resources