Multiple plots against Date and Time using MatPlotLib and Sql3 - python-3.x

This may have been asked before but I have been unable to find it.
I am a newbie to programming and working on a project to actively monitor and record voltage levels of four devices. These details are stored in a sqlite3 db with the date and time they were taken.
I am now trying to create a plot in Matplotlib with all four traces on the same graph, by using the Sqllite3 data. I have got one working although it is a little messy.
Here is my code so far:
import sqlite3
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from dateutil import parser
import time
import datetime
conn = sqlite3.connect ('Test.db')
c = conn.cursor()
def graph_data():
c.execute('SELECT DTG, Battery_Level1, Battery_Level2, Battery_Leve3, Battery_Level4 FROM Voltages')
data = c.fetchall()
dates = []
values = []
plt.xlabel('Time')
plt.ylabel('Voltage')
plt.title('PiPower Manager')
for row in data:
dates.append(parser.parse(row[0]))
values.append(row[1])
plt.plot_date(dates, values,'-',)
plt.show()
graph_data()
c.close
conn.close()
However I try to add the other lines it appears to cause an error. Any help would be appreciated.
Update 1
I have mostly tried playing with the section
for row in data:
dates.append(parser.parse(row[0]))
values.append(row[1])
I have added a new section similar as below
for row in data:
dates.append(parser.parse(row[0]))
values.append(row[2])
This does graph but looks odd - If I hash out the original it plots ok.
I have then tried adding row[3] and row [4] but I get an error;
Traceback (most recent call last):
File "/home/pi/plot.py", line 41, in <module>
graph_data()
File "/home/pi/plot.py", line 28, in graph_data
values.append(row[3])
IndexError: tuple index out of range
I am not really sure how to go about this......

Related

How can I link this file to my .ipynb file to collect frequent data from the first dataset to the 9th dataset

data set imagePlease use python language. I'm a beginner in frequent data mining systems. I'm trying to understand. Be simple and detailed as much as possible please
I tried using the for loop to collect data from a range but I'm still learning so I don't know how to implement it (keeps giving me the error "index 1 is out of bounds for axis 1 with size 1"). Please guide me.
NB: I was trying to construct a data frame but I don't know how to. Help me with that too
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
# Calling DataFrame constructor
Data = pd.read_csv('retail.txt', header = None)
# Intializing the list
transacts = []
# populating a list of transactions
for i in range(1, 9):
transacts.append([str(Data.values[i,j]) for j in range(1, 2000)])
df = pd.DataFrame()

Struggling to displaying the right (formatted) value for a matplotlib labels

Guide:
https://theoehrly.github.io/Fast-F1/examples_gallery/plot_qualifying_results.html#sphx-glr-examples-gallery-plot-qualifying-results-py
I am having trouble displaying the correct value or formatted form as a matplotlib label.
Issue: Bar Graph labels displaying as an unwanted, or badly formatted values.
(Is this TimeDelta[ns] in an integer under scientific notation? The dtype is timedelta64[ns])
Expected Values: The amount of time each driver is from the leader (s.ms) (HAM=0.038). Note: order is the same
print(times)
Code:
#!/usr/bin/python3-64
#required packages
#pip3 install fastf1
#pip3 install pandas
#pip3 install matplotlib
#pip3 install numpy
import matplotlib.pyplot as plt
import matplotlib.patches as pat
import fastf1 as ff1
import fastf1.plotting as ff1p
ff1p.setup_mpl(mpl_timedelta_support=True, color_scheme=None, misc_mpl_mods=False)
from fastf1.core import Laps
import pandas as pd
import numpy as np
from timple.timedelta import strftimedelta as td
import os
l=str.lower
def data_cache():
cache='/ff1_temp' #temp cache
while(True):
warn=input(l(f'!WARNING! A data cache will be made at {cache}\n'
f'Formula 1 Race Data will be downloaded to {cache}\n'
f'Would you like to continue? [y/n]\n'))
if(warn=='n'):
print('Quitting!\n')
exit(0)
elif(warn=='y'):
print(f'cache location: {cache}\n')
if not os.path.exists(cache): # os.path.exists(cache)
os.mkdir(cache) # os.mkdir(cache)
ff1.Cache.enable_cache(cache) # Fast F1 Cache API
break
else:
print('Plese Enter [y/n]\n')
continue
def data_load():
data=ff1.get_session(2021,'Netherlands','Q') #Y,L,S = Year, Location, Session
data.load(laps=True,telemetry=False,weather=False,messages=False)
return(data)
def data_graph():
data=data_load()
drivers=pd.unique(data.laps['DriverNumber'])
fll=list()
for row in drivers: #get fastest laps for session from each driver
fld=data.laps.pick_driver(row).pick_fastest()
fll.append(fld)
fl=Laps(fll).sort_values(by='LapTime').reset_index(drop=True)
flf=fl.pick_fastest()
fl['LapTimeDelta']=fl['LapTime']-flf['LapTime'] #determine the TimeDelta from leader
tc=list()
for index, lap in fl.iterlaps(): #team colours
color=ff1p.team_color(lap['Team'])
tc.append(color)
return(fl,tc,flf)
def data_plot():
fl,tc,flf=data_graph()
fig,ax=plt.subplots()
times=fl['LapTimeDelta']
fli=fl.index
# y x
bars=ax.barh(fli,times, color=tc,edgecolor='grey')
print(times) #expected values
ax.set_yticks(fl.index)
ax.set_yticklabels(fl['Driver'])
ax.set_xlabel('Time Difference (ms)')
#should be x axis?
ax.bar_label(bars) #(times)
ax.invert_yaxis()
lt=td(flf['LapTime'], '%m:%s.%ms')
plt.suptitle(f'2021 Dutch GP Qualifying\n'
f"Fastest at {lt} ({flf['Driver']})")
plt.show()
if(__name__=="__main__"):
data_cache()
data_plot()
exit(0)
results of print(bars)
results of print(type(times)) and print(type(bars))
What has been Attempted:
def data_plot():
ax.bar_label(times)
Traceback (most recent call last):
File "\python\datacollection\fp1.ff1.graph.py", line 144, in <module>
data_plot()
File "\python\datacollection\fp1.ff1.graph.py", line 132, in data_plot
ax.bar_label(times)
File "\Python\Python310\lib\site-packages\matplotlib\axes\_axes.py", line 2609, in bar_label
bars = container.patches
File "\Python\Python310\lib\site-packages\pandas\core\generic.py", line 5575, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'Lap' object has no attribute 'patches'
---
def data_plot_label(fli,times):
for i in range(len(fli)):
plt.text(i,times[i],times[i],ha='center',bbox=dict(alpha=0.8))
def data_plot():
data_plot_label(fli,times)
Close:
I'm still pretty green with this stuff,
Am I going about this correctly?
What are my options regarding labelling and matplotlib?
How do I set the correct formatted value for this label?
I find the graph is harder to understand without the actual values on it. It has less depth.
Relevant Docs:
https://theoehrly.github.io/Fast-F1/
https://pandas.pydata.org/docs/reference/index.html
https://matplotlib.org/stable/api/index
I overlooked something in the docs. I was not specifying the label only the container.
Reference:
https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.bar_label.html#matplotlib.axes.Axes.bar_label
Axes.bar_label(container, labels=None, *, fmt='%g', label_type='edge', padding=0, **kwargs)
Solution:
prfx='0 days 00:00:0'
sufx='000'
remov=''
def data_plot():
#removes leading and tailingzeros
times=times.astype(str).str.replace(prfx,remov).str.replace(sufx,remov)
#before: 0 days 00:00:0x.xxx000
#after: x.xxx
#over looked label, label_type=position-on-bar
ax.bar_label(bars, times, label_type='edge')
Just a little more formatting and it should look great!

Programming a simple Stock prediction service with Alpha Vantage in Python. I get this error

This is the program for the stock prediction to be simply printed...
from alpha_vantage.timeseries import TimeSeries
# Your key here
key = 'yourkeyhere'
ts = TimeSeries(key)
aapl, meta = ts.get_daily(symbol='AAPL')
print(aapl['2020-22-5'])
I get this error...
Traceback (most recent call last):
File "C:/Users/PycharmProjects/AlphaVantageTest/AlphaVantageTest.py", line 7, in <module>
print(aapl['2020-22-5'])
KeyError: '2020-22-5'
Since that didn't work, I tried getting a little more technical with it...
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.techindicators import TechIndicators
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
# Your key here
key = 'W01B6S3ALTS82VRF'
# Chose your output format, or default to JSON (python dict)
ts = TimeSeries(key, output_format='pandas')
ti = TechIndicators(key)
# Get the data, returns a tuple
# aapl_data is a pandas dataframe, aapl_meta_data is a dict
aapl_data, aapl_meta_data = ts.get_daily(symbol='AAPL')
# aapl_sma is a dict, aapl_meta_sma also a dict
aapl_sma, aapl_meta_sma = ti.get_sma(symbol='AAPL')
# Visualization
figure(num=None, figsize=(15, 6), dpi=80, facecolor='w', edgecolor='k')
aapl_data['4. close'].plot()
plt.tight_layout()
plt.grid()
plt.show()
I get these errors...
Traceback (most recent call last):
File "C:/Users/PycharmProjects/AlphaVantageTest/AlphaVantageTest.py", line 9, in <module>
ts = TimeSeries(key, output_format='pandas')
File "C:\Users\PycharmProjects\AlphaVantageTest\venv\lib\site-packages\alpha_vantage\alphavantage.py", line 66, in __init__
raise ValueError("The pandas library was not found, therefore can "
ValueError: The pandas library was not found, therefore can not be used as an output format, please install manually
How can I improve my program so that I don't receive these errors? None of these programs are bad syntax wise. Thank you to anyone that can help.
You need to install pandas. If you're just using pip, you can run pip install pandas if you are using conda to manage your envs you can use conda install pandas.
Glad it worked. According to this meta overflow post: What if I answer a question in a comment?
I am posting my comment as an answer so you can mark the question as answered.

module 'seaborn' has no attribute 'distplot'

I've some code like:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv('StudentsPerformance.csv')
#print(data.isnull().sum()) // checking if there are some missing values or not
#print(data.dtypes)checking datatypes of the dataset
# ANALYSÄ°S VALUES OF THE COLUMN'S
"""print(data['gender'].value_counts())
print(data['parental level of education'].value_counts())
print(data['race/ethnicity'].value_counts())
print(data['lunch'].value_counts())
print(data['test preparation course'].value_counts())"""
# Adding column total and average to the dataset
data['total'] = data['math score'] + data['reading score'] + data['writing score']
data['average'] = data ['total'] / 3
sns.distplot(data['average'])
I would like to see distplot of average for visualization but I run the program that gives me an error like
Traceback (most recent call last): File
"C:/Users/usersample/PycharmProjects/untitled1/sample.py", line 22, in
sns.distplot(data['average']) AttributeError: module 'seaborn' has no attribute 'distplot'
I've tried to reinstall and install seaborn and upgrade the seaborn to 0.9.0 but it doesn't work.
head of my data female,"group B","bachelor's
degree","standard","none","72","72","74" female,"group C","some
college","standard","completed","69","90","88" female,"group
B","master's degree","standard","none","90","95","93" male,"group
A","associate's degree","free/reduced","none","47","57","44"
this might be due to removal of paths in environment variables section. Try considering to add your IDE scripts and python folder. I am using pycharm IDE, and did the same and its working fine.

Keyerror when adding a column to a Dataframe (Pandas)

Pandas DataFrame is not really accepting adding a second column, and I cannot really troubleshoot the issue. I am trying to display Moving Averages. The code works fine just for the first one (MA_9), and gives me error as soon I try to add additional MA (MA_20).
Is it not possible in this case to add more than one column?
The code:
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import matplotlib.pyplot as plt
symbol = 'GOOG.US'
start = '20140314'
end = '20180414'
google = pdr.DataReader(symbol, 'stooq', start, end)
print(google.head())
google_close = pd.DataFrame(google.Close)
print(google_close.last_valid_index)
google_close['MA_9'] = google_close.rolling(9).mean()
google_close['MA_20'] = google_close.rolling(20).mean()
# google_close['MA_60'] = google_close.rolling(60).mean()
# print(google_close)
plt.figure(figsize=(15, 10))
plt.grid(True)
# display MA's
plt.plot(google_close['Close'], label='Google_Cls')
plt.plot(google_close['MA_9'], label='MA 9 day')
plt.plot(google_close['MA_20'], label='MA 20 day')
# plt.plot(google_close['MA_60'], label='MA 60 day')
plt.legend(loc=2)
plt.show()
Please update your code as below and then it should work:
google_close['MA_9'] = google_close.Close.rolling(9).mean()
google_close['MA_20'] = google_close.Close.rolling(20).mean()
Initially there was only one column data of Close so your old code google_close['MA_9'] = google_close.rolling(9).mean() worked but after this line of code now it has two column and so it does not know which data you are trying to mean. So updating with the column details of data you wanted to mean, it works google_close['MA_20'] = google_close.Close.rolling(20).mean()

Resources