I have a dataset with 1915 rows, that has an entry date col and a qty col contained in it. i.e.
10/22/2018 qty:1
10/22/2018 qty:3
11/22/2017 qty:1
Is it possible to edit the code below to multiply the count of dates by the qty associated with it? I've been fiddling around with where to put the multiplier but am stumped. This is the code I have running so far (without multiplier).
import pandas as pd
import matplotlib.pyplot as plt; plt.rcdefaults()
import matplotlib.dates as mdates
import numpy as np
import matplotlib.pyplot as plt
quotes = pd.read_csv("PO25474.csv")
quotes["ENTRY_DATE"] = quotes["ENTRY_DATE"].astype("datetime64")
plt.figure(figsize=(20, 10))
ax = (quotes["ENTRY_DATE"].groupby([quotes["ENTRY_DATE"].dt.year,\
quotes["ENTRY_DATE"].dt.month]).count().plot(kind="bar"))
ax.set_xlabel("quotes by month")
ax.set_ylabel("count")
ax.set_title("title")
plt.show()
I solved it. Had to put in QTY after the groupby and swap out count with sum.
import pandas as pd
import matplotlib.pyplot as plt; plt.rcdefaults()
import matplotlib.dates as mdates
import numpy as np
quotes = pd.read_csv("PO25474.csv")
qty = quotes[["ENTRY_DATE" , "QTY"]]
quotes["ENTRY_DATE"] = quotes["ENTRY_DATE"].astype("datetime64")
plt.figure(figsize=(12, 5))
ax = (quotes.groupby([quotes["ENTRY_DATE"].dt.year, quotes["ENTRY_DATE"].dt.month])
['QTY'].sum().plot(kind="bar"))
ax.set_xlabel("quotes by month")
ax.set_ylabel("count")
ax.set_title("PO25474")
plt.show()
Related
I have three columns of data. They are too large to generate meshgrids from. So e.g. in order to generate a surface plot from the data, I use a method like so
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
x, y, z = np.loadtxt('data_file', unpack=True)
df = pd.DataFrame({'x':x, 'y':y, 'z':z})
fig = plt.figure()
ax = Axes3D(fig)
surf = ax.plot_trisurf(df.x, df.y, df.z, cmap=cm.jet, linewidth=0.05)
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()
Is there a similar alternative to plot_trisurf for contours?
I tried to plot a graph for energies of 4 nodes using line graph but I'm not able to identify which line represent which node ID(1,2,3 or 4)
My csv looks something like this :
Time,Source,Destination,Bits,Energy
0,1,2,288,9.9999856
1058,1,2,288,9.9999856
1140,2,1,96,9.9999808
1958,2,3,96,9.9999952
2024,2,1,96,9.9999808
2051,2,3,288,9.999966399
3063,2,3,288,9.9999808
3126,3,2,96,9.999976
3127,2,1,288,9.9999664
3946,3,2,96,9.999961599
8340,1,2,288,9.999952
9418,1,2,288,9.999947199
9479,2,1,96,9.999942399
10299,2,3,96,9.9999712
10365,2,1,96,9.9999472
10758,2,3,288,9.999927999
11770,2,3,288,9.9999568
11832,3,2,96,9.999951999
11842,2,1,288,9.9999328
Code :
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.read_csv('DS1.csv')
for Energy,data in df.groupby('Source'):
plt.plot(data['Time'], data['Energy'])
plt.legend(data['Source'])
#print(data)
plt.xlabel('Time')
plt.ylabel('Energy')
plt.legend()
plt.show()
I actually want to plot source,energy vs Time for all sources(1 to 4)
You need to set the label.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.read_csv('DS1.csv')
for Energy, data in df.groupby('Source'):
plt.plot(data['Time'], data['Energy'], label=Energy)
#print(data)
plt.xlabel('Time')
plt.ylabel('Energy')
plt.legend()
plt.show()
My Code:
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("linear_regression_dataset.csv", sep=";")
plt.scatter(df.Deneyim,df.Maas)
plt.xlabel("deneyim")
plt.ylabel("maas")
plt.show()
Is there a solution proposal?
The graphic I want:
sort the dataframe first and then you can plot
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("linear_regression_dataset.csv", sep=";")
df['Mass']= df['Mass'].astype(int)
df.sort_values('Maas',inplace=True)
plt.scatter(df.Deneyim,df.Maas)
plt.xlabel("deneyim")
plt.ylabel("maas")
plt.show()
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import pandas as pd
filepath='E:/PROJECT ON DATA SCIENCE/boxplot/fee.csv';
X=pd.read_csv(filepath_or_buffer=filepath,index_col=0)
X.boxplot(by='stype', column='fee')
X.boxplot(by='pincode', column='fee')
If you want to boxplot X grouping by both stype and pincode you can use
X.boxplot(column='fee', by=['stype', 'pincode'])
Complete code would be
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import pandas as pd
filepath='E:/PROJECT ON DATA SCIENCE/boxplot/fee.csv';
X=pd.read_csv(filepath_or_buffer=filepath,index_col=0)
X.boxplot(column='fee', by=['stype', 'pincode'])
import pandas as pd
import numpy as np
from matplotlib.finance import candlestick_ohlc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import io
import datetime
import urllib
import urllib.request
%matplotlib notebook
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/GOOG/chartdata;
type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
sourcePage = response.read().decode('utf-8')
df = pd.read_csv(io.StringIO(sourcePage), skiprows=18, header=None, sep=",",
names=['date','closeP','highP','lowP','openP','volume'],
index_col= 0, parse_dates= True)
if 'volume' not in df:
df['volume'] = np.zeros(len(df))
DATA = df[['openP', 'highP', 'lowP', 'closeP','volume']].values
f1 = plt.subplot2grid((6,4), (1,0), rowspan=6, colspan=4, axisbg='#07000d')
candlestick_ohlc(f1, DATA, width=.6, colorup='#53c156', colordown='#ff1717')
f1.grid('on')
f1.xaxis.set_major_locator(mticker.MaxNLocator(15))
f1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.subplots_adjust(left=.09, bottom=.14, right=.94, top=.95, wspace=.20, hspace=0)
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()
So here's the problem, when I try to plot the 'candlestick_ohlc' but it only plots the volume bar chart! (Why is this happening?) I'm thinking that maybe the problem has to do with my dates? I'm using iPython Notebook btw. My source is from - Yahoo Finance. If you notice, I skipped the first 18 lines so that I can get straight to the actual data itself and it looks like:
20150302,569.7757,570.5834,557.2202,558.9953,2129600
20150303,572.0694,573.8146,564.9689,568.8881,1704700
20150304,571.8001,575.5299,566.4548,570.3043,1876800
20150305,573.7548,576.3277,571.8400,573.4456,1389600
20150306,566.1307,575.1011,565.2082,573.3060,1659100
20150309,567.2925,568.7086,561.9921,565.3079,1062100
date,close,high,low,open,volume
Any ideas? Would appreciate any help!!
So with the help of #DSM,
DATA = df[['openP', 'highP', 'lowP', 'closeP','volume']]
DATA = DATA.reset_index()
DATA["date"] = DATA["date"].apply(mdates.date2num)
f1 = plt.subplot2grid((6,4), (1,0), rowspan=6, colspan=4, axisbg='#07000d')
candlestick_ohlc(f1, DATA.values, width=.6, colorup='#53c156', colordown='#ff1717')
fixed the problem! Credits to him.