import requests
import pandas as pd
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime
def income_statement(stock):
number_yrs = input('how many yr(s)?').strip()
api_key = 'd5abd8f1620e04709eeb05ebafa9af7e'
IS = requests.get(f'https://financialmodelingprep.com/api/v3/balance-sheet-statement/{stock}?limit={number_yrs}&apikey={api_key}').json()
IS = pd.DataFrame.from_dict(IS)
IS = IS.T
print(IS.T)
save_to_csv = input('save_to_csv? y or n').strip()
if save_to_csv == 'y':
IS.to_csv('IS' + stock + '.csv')
while True:
command = input('stock?')
stock = comman.split(' ')[1]
if comman == 'IS ' + stock:
income_statement(stock)
elif comman == 'quit':
break
else:
print('Invalid Command.')
I am learning python for finance right now. I would like to build a stock research terminal that allows me to call income statement, balance sheet, historical price etc. of a stock from the Internet. I used the above code but when I type IS TSLA, it still gives me the income statement of AAPL. I wonder which parts of the code go wrong? Thanks!
Related
In this program i am not using request or beautiful soup function. I'm instead only using the datetime to extract the URLs. Now in the current program, I have written to extract the values for a long period. I want to make it in such a way that, if I automate this program and it runs today, it will extract yesterday's data. Similarly if it runs tomorrow, it will extract todays data and so on.
here is the code,
import datetime
from datetime import date, datetime,timedelta
import warnings
import datetime
import pandas as pd
import wget
import glob
import os
warnings.filterwarnings("ignore")
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
from urllib.error import HTTPError
def date_range(start_date,end_date):
for n in range(int((end_date-start_date).days)):
yield start_date + timedelta(n)
def get_urls(base_url):
part_two = "/dailyCoal1-"
end_part = ".xlsx"
start_date = date(2020,11,1)
end_date = datetime.datetime.now().date()
start_urls = list()
for single_date in date_range(start_date, end_date):
start_urls.append(single_date.strftime(base_url+'%d-%m-%Y'+part_two+'%Y-%m-%d'+end_part))
return start_urls
def excel_download(link,out):
#downloads a given link provided to a output directory in out
wget.download(link,out)
if __name__ =="__main__":
base_url = "https://npp.gov.in/public-reports/cea/daily/fuel/"
mypath = "/Users/vp/Desktop/temp"
temp_folder = '/Users/vp/Desktop/temp'
out_folder = "/Users/vp/Desktop/NPP"
log_file = os.path.join(out_folder,'debug_log_npp.log')
out_file = os.path.join(out_folder,'Energy_inputs_npp.csv')
file_links = get_urls(base_url)
for link in file_links:
try:
excel_download(link,temp_folder)
except HTTPError:
content = "HTTP issue while capturing data for this link - " + link
log_writer(log_file,content)
continue
file = glob.glob(os.path.join(temp_folder,'*.xlsx'),recursive=True)[0]
df = pd.read_excel(file)
To capture yesterday's data, i created this in the main function where i check for yesterday = and then cancel if it isnt yesterday. But then its throwing error as it constantly picks the start date as its day one.
if(date_time_obj != Yesterday):
os.remove(file)
content = "Date mis-matched - " + str(date_time_obj) + " " + str(Yesterday)
In this program, date_time_obj - is the date it is currently trying to extract data for.
Everyday if this program runs at 8pm, it needs to only capture one day before data on a daily basis.
if this cannot be done in datetime, but only on request or bs4, then how do i approach this problem?
I don't know if you wanted a valid link as your code doesn't seem to produce those for me but you only need to tweak to work off start_date only and return a single item to return yesterday's link matching with your current output for same date.
import datetime
from datetime import date, datetime,timedelta
import warnings
import datetime
import pandas as pd
import glob
import os
warnings.filterwarnings("ignore")
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
from urllib.error import HTTPError
def get_url(base_url):
part_two = "/dailyCoal1-"
end_part = ".xlsx"
start_date = datetime.datetime.now().date() + timedelta(-1)
start_url = start_date.strftime(base_url+'%d-%m-%Y'+part_two+'%Y-%m-%d'+end_part)
return start_url
def excel_download(link,out):
#downloads a given link provided to a output directory in out
wget.download(link,out)
if __name__ =="__main__":
base_url = "https://npp.gov.in/public-reports/cea/daily/fuel/"
mypath = "/Users/vp/Desktop/temp"
temp_folder = '/Users/vp/Desktop/temp'
out_folder = "/Users/vp/Desktop/NPP"
log_file = os.path.join(out_folder,'debug_log_npp.log')
out_file = os.path.join(out_folder,'Energy_inputs_npp.csv')
file_link = get_url(base_url)
print(file_link)
I've studied the PyOWM and OWM API guides and some code recipes and have things working fine in a larger bit of code but I just can't work out how to get get the current weather icon for todays forecast? Here's a code snippet that works apart from the icon:
from pyowm.owm import OWM
from pyowm.utils import timestamps
import requests, json
def checkInternetRequests(url='http://www.google.com/', timeout=3):
try:
r = requests.head(url, timeout=timeout)
return True
except requests.ConnectionError as ex:
print(ex)
return False
def get_weather():
if checkInternetRequests() == True:
# 52.5766° N, 1.5438° W
print ("Internet Up")
CITY = "Atherstone, GB"
LON = 1.54
LAT = 52.576
API_KEY = "myapikey"
ICON_URL_1 = "http://openweathermap.org/img/wn/"
ICON_URL_2 = "#2x.png"
owm = OWM(API_KEY)
mgr = owm.weather_manager()
one_call = mgr.one_call(lat=LAT, lon=LON)
print (one_call.forecast_hourly[3]) #works - 3hrs from now
print ("===========================")
print (one_call.forecast_daily[0]) # works - Today
print ("===========================")
print (one_call.forecast_daily[0].weather.icon)
else:
print ("Internet Down")
time.sleep(10)
get_weather()
I'm getting the following error:
Traceback (most recent call last):
File "weather1.py", line 37, in <module>
get_weather()
File "weather1.py", line 31, in get_weather
print (one_call.forecast_daily[0].weather.icon)
AttributeError: 'Weather' object has no attribute 'weather'
Where am I going wrong ? Thanks!
OK - After a few beers and some food plus 4hrs of messing with the code I have this working !
one_call.forecast_daily[0].weather_icon_url()
gives me a url I can use to display the forecast icon in my code for today.
import pyowm
from pyowm.utils import timestamps, formatting
from pyowm.owm import OWM
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
observation = mgr.weather_at_place('London')
w = observation.weather
icon = w.weather_icon_url(size='2x')
img = mpimg.imread(icon)
plt.imshow(img)
plt.show()
I am doing a project on a notification program that reads an excel file as you edit it and then sends a notification on the time you set the reminder.
The code is here:
import pandas as pd
import datetime
from plyer import notification
def remind(title, message):
notification.notify(title=title, message=message, timeout=10)
if __name__ == '__main__':
while True:
df = pd.read_excel("./reminder.xlsx")
for index, item in df.iterrows():
time = datetime.datetime.now().strftime("%H:%M:%S")
if time == item['Time']:
remind(item['Title'], item['Message'])
else:
continue
The program is running fine but the reminder is not popping up
i am working on python 3.7.9 on win8 (64-bit)
from numpy import nan
import pandas as pd
import datetime
from plyer import notification
import time
def notifyMe(title, message,icon):
notification.notify(
title=title,
message=message,
app_icon = icon,
timeout=10)
if __name__ == '__main__':
notifyMe("Hello","We are in together in these tough time","virusicon.ico")
df = pd.read_excel("reminder.xlsx")
df["Icon"] = df["Icon"].astype(str)
df["Heading"] = df["Heading"].astype(str)
df["Message"] = df["Message"].astype(str)
number_of_rows = len(df.index)
while True:
flag = True
for item in range(0,number_of_rows):
now = datetime.datetime.strptime(datetime.datetime.now().strftime("%H:%M:%S"),"%H:%M:%S")
x = datetime.datetime.strptime(df['Time'][item].strftime("%H:%M:%S"),"%H:%M:%S")
if x==now:
print(True)
print(df['Heading'][item], df['Message'][item],df["Icon"][item])
if df["Icon"][item]=="nan":
df["Icon"][item]=""
if df["Message"][item]=="nan":
df["Message"][item]=""
if df["Heading"][item]=="nan":
df["Heading"][item]=""
notifyMe(df['Heading'][item], df['Message'][item],df['Icon'][item])
time.sleep(1)
if now<=x:
flag = False
if flag:
exit()
Format of the xslx file
You can try this. It may help you
I have been implemented successfully recommendation engine but having a problem with that if I put any unrelated value still giving output it must be shown that " you have entered wrong value"
Here is output
correct smiles output
If I put wrong smiles or anything which does not belong to the training dataset then it must be given a message that please enter correct smiles.
Wrong output
I have entered any random text so if I enter wrong smiles it must come result as
result: "Please enter correct smiles"
I am putting my code I try If else but not working.
from rdkit import Chem
from rdkit.Chem import Draw
import pandas as pd
from flask import Flask, jsonify, request, abort
import json
import sys
import random
import unicodedata
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
data = pd.read_csv("clean_o2h.csv", sep=",")
app = Flask(__name__)
#app.route('/', methods=["POST"])
def predict_word():
print(request.get_json())
sent = request.get_json()['smiles']
reactants = data["reactants"].tolist()
targets = data["targets"].tolist()
error = ("plese enter correct smiles")
# TFIDF vector representation
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(targets)
test = vectorizer.transform([sent])
#test = vectorizer.transform(["NC1=CC=C2C(COC(N[C#H]3C4=C(CC3)C=CC=C4)=N2)=C1"])
cosine_similarities = cosine_similarity(test, X).flatten()
l = []
# n = ["Result 1","Result 2", "Result 3","Result 4"]
# Extract top 5 similarity records
similarity = cosine_similarities.argsort()[:-5:-1]
#print("Top 5 recommendations...")
for sim in similarity:
#print(reactants[sim])
result = reactants[sim]
l.append(result)
print(l)
# output = dict(zip(l,n))
res = { i : l[i] for i in range(0, len(l) ) }
# return jsonify({"Recommendation": res})
if(sent == targets):
return jsonify({"Recommendation": res})
else:
return jsonify({"Error": error})
if __name__ == '__main__':
app.run(port='8080')
Please help me with correct logic here target variable is the smiles and reactants variable is a recommendation.
You can check whether the SMILES are valid by parsing the molecule using RDKit.
This should answer part of your question, but I'm sorry I don't understand what else you are trying to achieve here.
from rdkit import Chem
error = 'something is wrong'
smiles = request.get_json()['smiles']
m = Chem.MolFromSmiles(smiles)
if m is None:
return jsonify({"Error": error})
else:
# you have valid smiles
pass
I have developed the code below for a web crawling object.
It takes two dates as inputs.Then creates a list of dates between these two dates and attach each one to a webpage url which contains weather information of a location. Then it converts HTML tables of data into Dataframe and after that stores data as csv file in storage (the base link is: https://www.wunderground.com/history/daily/ir/mashhad/OIMM/date/2019-1-3 and as you can see in this example the date is 2019-1-3):
from datetime import timedelta, date
from bs4 import BeautifulSoup
from selenium import webdriver
import pandas as pd
from furl import furl
import os
import time
class WebCrawler():
def __init__(self, st_date, end_date):
if not os.path.exists('Data'):
os.makedirs('Data')
self.path = os.path.join(os.getcwd(), 'Data')
self.driver = webdriver.PhantomJS()
self.base_url = 'https://www.wunderground.com/history/daily/ir/mashhad/OIMM/date/'
self.st_date = st_date
self.end_date = end_date
def date_list(self):
# Create list of dates between two dates given as inputs.
dates = []
total_days = int((self.end_date - self.st_date).days + 1)
for i in range(total_days):
date = self.st_date + timedelta(days=i)
dates.append(date.strftime('%Y-%m-%d'))
return dates
def create_link(self, attachment):
# Attach dates to base link
f = furl(self.base_url)
f.path /= attachment
f.path.normalize()
return f.url
def open_link(self, link):
# Opens link and visits page and returns html source code of page
self.driver.get(link)
html = self.driver.page_source
return html
def table_to_df(self, html):
# Finds table of weather data and converts it into pandas dataframe and returns it
soup = BeautifulSoup(html, 'lxml')
table = soup.find("table",{"class":"tablesaw-sortable"})
dfs = pd.read_html(str(table))
df = dfs[0]
return df
def to_csv(self, name, df):
# Save the dataframe as csv file in the defined path
filename = name + '.csv'
df.to_csv(os.path.join(self.path,filename), index=False)
This is the way I want to use the WebCrawler object:
date1 = date(2018, 12, 29)
date2 = date(2019, 1, 1)
# Initialize WebCrawler object
crawler = WebCrawler(st_date=date1, end_date=date2)
dates = crawler.date_list()
for day in dates:
print('**************************')
print('PROCESSING : ', day)
link = crawler.create_link(day)
print('WAITING... ')
time.sleep(3)
print('VISIT WEBPAGE ... ')
html = crawler.open_link(link)
print('DATA RETRIEVED ... ')
df = crawler.table_to_df(html)
print(df.head(3))
crawler.to_csv(day, df)
print('DATA SAVED ...')
The problem which occurs is that the first iteration of loop runs perfect but the second one stops with an error which says No tables where found (occurs in table = soup.find("table",{"class":"tablesaw-sortable"}) line) and that's because page source is returned by WebCrawler.open_link before the webpage fully load the contents of webpage including the table (containing weather information). there is also a probability that website rejects the request because it's making the servers too busy.
Is there anyway that we could build a loop that keep trying to open the link until when it could find the table, or at least wait until table is loaded and then return the table?
You can have selenium wait for a specific element. In your case it will be the table with the class name of "tablesaw-sortable". I highly recommend that you use CSS selectors to find this element, as it's fast and less error prone that getting all table elements.
Here is the CSS selector, premade for you table.tablesaw-sortable. Set selenium to wait until that element has loaded.
Source: https://stackoverflow.com/a/26567563/4159473
I rewrote the code using the https://stackoverflow.com/a/26567563/4159473 solution which was suggested by #mildmelon and I also used some delays between each time sending request to server and asking for the page source:
from datetime import timedelta, date
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import pandas as pd
from furl import furl
import os
import time
class WebCrawler():
def __init__(self, st_date, end_date):
if not os.path.exists('Data'):
os.makedirs('Data')
self.path = os.path.join(os.getcwd(), 'Data')
self.driver = webdriver.PhantomJS()
self.delay_for_page = 7
self.base_url = 'https://www.wunderground.com/history/daily/ir/mashhad/OIMM/date/'
self.st_date = st_date
self.end_date = end_date
def date_list(self):
# Create list of dates between two dates given as inputs.
dates = []
total_days = int((self.end_date - self.st_date).days + 1)
for i in range(total_days):
date = self.st_date + timedelta(days=i)
dates.append(date.strftime('%Y-%m-%d'))
return dates
def create_link(self, attachment):
# Attach dates to base link
f = furl(self.base_url)
f.path /= attachment
f.path.normalize()
return f.url
def open_link(self, link):
# Opens link and visits page and returns html source code of page
self.driver.get(link)
myElem = WebDriverWait(self.driver, self.delay_for_page)\
.until(EC.presence_of_element_located((By.CLASS_NAME, 'tablesaw-sortable')))
def table_to_df(self, html):
# Finds table of weather data and converts it into pandas dataframe and returns it
soup = BeautifulSoup(html, 'lxml')
table = soup.find("table",{"class":"tablesaw-sortable"})
dfs = pd.read_html(str(table))
df = dfs[0]
return df
def to_csv(self, name, df):
# Save the dataframe as csv file in the defined path
filename = name + '.csv'
df.to_csv(os.path.join(self.path,filename), index=False)
date1 = date(2019, 2, 1)
date2 = date(2019, 3, 5)
# Initialize WebCrawler object
crawler = WebCrawler(st_date=date1, end_date=date2)
dates = crawler.date_list()
for day in few_dates:
print('**************************')
print('DATE : ', day)
link = crawler.create_link(day)
print('WAITING ....')
print('')
time.sleep(12)
print('OPENING LINK ... ')
try:
crawler.open_link(link)
html = crawler.driver.page_source
print( "DATA IS FETCHED")
df = crawler.table_to_df(html)
print(df.head(3))
crawler.to_csv(day, df)
print('DATA SAVED ...')
except TimeoutException:
print( "NOT FETCHED ...!!!")
The weather information is fetched without problem. I guess delays between each request resulted in better performance. The line myElem = WebDriverWait(self.driver, self.delay_for_page)\.until(EC.presence_of_element_located((By.CLASS_NAME, 'tablesaw-sortable'))) has also improved speed.