Subtraction between 'dict_values' and 'float' - python-3.x

I am getting the error "TypeError: unsupported operand type(s) for -: 'dict_values' and 'float'" from line 173 in the sample code. I have copied from a book that does not yet seem to be updated to Python 3 and other forum topics don't seem to cover this problem.
It is trying to calculate the error in an optimsation for the difference in market values and model values, but the data storage type is different across the two.
Thanks
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import calendar
# frame
from get_year_deltas import get_year_deltas
from constant_short_rate import constant_short_rate
from market_environment import market_environment
from plot_option_stats import plot_option_stats
# simulation
from sn_random_numbers import sn_random_numbers
from simulation_class import simulation_class
from geometric_brownian_motion import geometric_brownian_motion
from jump_diffusion import jump_diffusion
from square_root_diffusion import square_root_diffusion
# valuation
from valuation_class import valuation_class
from valuation_mcs_european import valuation_mcs_european
from valuation_mcs_american import valuation_mcs_american
from derivatives_position import derivatives_position
from derivatives_portfolio import derivatives_portfolio
#import os
#path = os.getcwd()
url = 'http://www.stoxx.com/download/historical_values/h_vstoxx.txt'
vstoxx_index = pd.read_csv(url, index_col=0, header=2,parse_dates=True, dayfirst=True)
vstoxx_index = vstoxx_index[('2013/12/31' < vstoxx_index.index) & (vstoxx_index.index < '2014/4/1')]
vstoxx_futures = pd.read_excel('./vstoxx_march_2014.xlsx', 'vstoxx_futures')
del vstoxx_futures['A_SETTLEMENT_PRICE_SCALED']
del vstoxx_futures['A_CALL_PUT_FLAG']
del vstoxx_futures['A_EXERCISE_PRICE']
del vstoxx_futures['A_PRODUCT_ID']
columns = ['DATE', 'EXP_YEAR', 'EXP_MONTH', 'PRICE']
vstoxx_futures.columns = columns
def third_friday(date):
day = 21 - (calendar.weekday(date.year, date.month, 1) + 2) % 7
return dt.datetime(date.year, date.month, day)
set(vstoxx_futures['EXP_MONTH'])
third_fridays = {}
for month in set(vstoxx_futures['EXP_MONTH']):
third_fridays[month] = third_friday(dt.datetime(2014, month, 1))
#third_fridays
tf = lambda x: third_fridays[x]
vstoxx_futures['MATURITY'] = vstoxx_futures['EXP_MONTH'].apply(tf)
#vstoxx_futures.tail()
vstoxx_options = pd.read_excel('./vstoxx_march_2014.xlsx', 'vstoxx_options')
#vstoxx_options.info()
del vstoxx_options['A_SETTLEMENT_PRICE_SCALED']
del vstoxx_options['A_PRODUCT_ID']
columns = ['DATE', 'EXP_YEAR', 'EXP_MONTH', 'TYPE', 'STRIKE', 'PRICE']
vstoxx_options.columns = columns
vstoxx_options['MATURITY'] = vstoxx_options['EXP_MONTH'].apply(tf)
#vstoxx_options.head()
vstoxx_options['STRIKE'] = vstoxx_options['STRIKE'] / 100.0
save = False
if save is True:
import warnings
warnings.simplefilter('ignore')
h5 = pd.HDFStore('./vstoxx_march_2014.h5', complevel=9, complib='blosc')
h5['vstoxx_index'] = vstoxx_index
h5['vstoxx_futures'] = vstoxx_futures
h5['vstoxx_options'] = vstoxx_options
h5.close()
pricing_date = dt.datetime(2014, 3, 31)
# last trading day in March 2014
maturity = third_fridays[10]
# October maturity
initial_value = vstoxx_index['V2TX'][pricing_date]
# VSTOXX on pricing_date
forward = vstoxx_futures[(vstoxx_futures.DATE == pricing_date) & (vstoxx_futures.MATURITY == maturity)]['PRICE'].values[0]
tol = 0.20
option_selection = vstoxx_options[(vstoxx_options.DATE == pricing_date)
& (vstoxx_options.MATURITY == maturity)
& (vstoxx_options.TYPE == 'C')
& (vstoxx_options.STRIKE > (1 - tol) * forward)
& (vstoxx_options.STRIKE < (1 + tol) * forward)]
me_vstoxx = market_environment('me_vstoxx', pricing_date)
me_vstoxx.add_constant('initial_value', initial_value)
me_vstoxx.add_constant('final_date', maturity)
me_vstoxx.add_constant('currency', 'EUR')
me_vstoxx.add_constant('frequency', 'B')
me_vstoxx.add_constant('paths', 10000)
csr = constant_short_rate('csr', 0.01)
# somewhat arbitrarily chosen here
me_vstoxx.add_curve('discount_curve', csr)
# parameters to be calibrated later
me_vstoxx.add_constant('kappa', 1.0)
me_vstoxx.add_constant('theta', 1.2 * initial_value)
vol_est = vstoxx_index['V2TX'].std() * np.sqrt(len(vstoxx_index['V2TX']) / 252.0)
me_vstoxx.add_constant('volatility', vol_est)
# vol_est
vstoxx_model = square_root_diffusion('vstoxx_model', me_vstoxx)
me_vstoxx.add_constant('strike', forward)
me_vstoxx.add_constant('maturity', maturity)
payoff_func = 'np.maximum(maturity_value - strike, 0)'
vstoxx_eur_call = valuation_mcs_european('vstoxx_eur_call',vstoxx_model, me_vstoxx, payoff_func)
option_models = {}
for option in option_selection.index:
strike = option_selection['STRIKE'].ix[option]
me_vstoxx.add_constant('strike', strike)
option_models[option] = valuation_mcs_european( 'eur_call_%d' % strike, vstoxx_model, me_vstoxx, payoff_func )
def calculate_model_values(p0):
'''
Returns all relevant option values.
Parameters
p0 : tuple/list, tuple of kappa, theta, volatility
Returns
model_values : dict, dictionary with model values
'''
kappa, theta, volatility = p0
vstoxx_model.update(kappa=kappa,
theta=theta,
volatility=volatility)
model_values = {}
for option in option_models:
model_values[option] = option_models[option].present_value(fixed_seed=True)
return model_values
# calculate_model_values((0.5, 27.5, vol_est))
i = 0
def mean_squared_error(p0):
'''
Returns the mean-squared error given the model and market values.
Parameters
p0 : tuple/list, tuple of kappa, theta, volatility
Returns
MSE : float, mean-squared error
'''
global i
model_values = np.array(calculate_model_values(p0).values())
market_values = option_selection['PRICE'].values
option_diffs = model_values - market_values
MSE = np.sum(option_diffs ** 2) / len(option_diffs)
# vectorized MSE calculation
if i % 20 == 0:
if i == 0:
print( '%4s' % i, '%6s' % "kappa", '%6s' % "theta", '%6s —>' % "vola", '%6s' % "MSE")
print( '%4d' % i, '%6.3f' % p0[0], '%6.3f' % p0[1], '%6.3f —>' % p0[2], '%6.3f' % MSE )
i += 1
return MSE
mean_squared_error((0.5, 27.5, vol_est))

Related

Trying to plot a rolling corr line chart but Matplot keeps saying to bring in only valid columns?

Im trying to create a rolling corr using matplot but I get the error "select only valid columns before calling the operation. Dropped columns were Index(['time'], dtype='object')
I have dropped that field from my data frame but the error keeps on appearing ?
Is it something to do with my .iloc argument?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import seaborn as sns
import scipy.stats as stats
import json
from datetime import timezone
from datetime import datetime
from pycoingecko import CoinGeckoAPI
pd.options.display.width = 0
def datetime_to_unix(year, month, day):
'''datetime_to_unix(2021, 6, 1) => 1622505600.0'''
dt = datetime(year, month, day)
timestamp = (dt - datetime(1970, 1, 1)).total_seconds()
return timestamp
def unix_to_datetime(unix_time):
'''unix_to_datetime(1622505700)=> ''2021-06-01 12:01am'''''
ts = int(unix_time/1000 if len(str(unix_time)) > 10 else unix_time) # /1000 handles milliseconds
return datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %l:%M%p').lower()
# Initialize the client
cg = CoinGeckoAPI()
# Retrieve looksrare data in USD
result = cg.get_coin_market_chart_range_by_id(
id='looksrare',
vs_currency='usd',
from_timestamp=datetime_to_unix(2022, 1, 11),
to_timestamp=datetime_to_unix(2022, 4, 20)
)
time = [ unix_to_datetime(i[0]) for i in result['prices'] ]
p_array = np.array(result['prices'])
price = p_array[:,1]
v_array = np.array(result['total_volumes'])
volume = v_array[:,1]
df = pd.DataFrame({'time':time, 'price':price,})
df.head(100)
# Retrieve ETH data in USD
result = cg.get_coin_market_chart_range_by_id(
id='ethereum',
vs_currency='usd',
from_timestamp=datetime_to_unix(2022, 1, 11),
to_timestamp=datetime_to_unix(2022, 4, 20)
)
time = [ unix_to_datetime(i[0]) for i in result['prices'] ]
p_array = np.array(result['prices'])
price = p_array[:,1]
v_array = np.array(result['total_volumes'])
volume = v_array[:,1]
df2 = pd.DataFrame({'time':time, 'price':price,})
df2.head(100)
df_cd = pd.merge(df, df2, how='inner', on='time')
df_cd = df_cd.drop('time', 1)
output = df_cd.corr()
output1 = df_cd['price_x'].corr(df_cd['price_y'])
overall_pearson_r = df_cd.corr().iloc[0,1]
print(df_cd)
print(f"Pandas computed Pearson r: {overall_pearson_r}")
r, p = stats.pearsonr(df_cd.dropna()['price_x'], df_cd.dropna()['price_y'])
print(f"Scipy computed Pearson r: {r} and p-value: {p}")
# compute rolling window synchrony
f,ax=plt.subplots(figsize=(7,3))
df.rolling(window=30,center=True).median().plot(ax=ax)
ax.set(xlabel='Time',ylabel='Pearson r')
ax.set(title=f"Overall Pearson r = {np.round(overall_pearson_r,2)}");

Daily data of same month over years

I have data from the same month over period of time and I trying to plot the mean by day of the motnh but I don´t know how to do it.
This is how the dataframe looks like
The main code to get the dataframe:
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs
import matplotlib.pyplot as plt
from datetime import date, timedelta
from datetime import datetime
inicio = date(1973, 1, 1)
#inicio = date(2019, 2, 15)
#final = date(2000, 10, 10)
final = date(1974, 3, 1)
delta = timedelta(days=1)
años=[]
links=[]
while inicio <= final:
fechas=inicio.strftime("%Y-%m-%d")
#años.append(datetime.strptime(fechas, '%Y-%m-%d').date())
años.append(fechas)
url='http://weather.uwyo.edu/cgi-bin/sounding?region=samer&TYPE=TEXT%3ALIST&YEAR={}&MONTH={}&FROM={}12&TO={}12&STNM=80222'.format(fechas[0:4],fechas[5:7],fechas[8:10],fechas[8:10])
links.append(url)
inicio += delta
d = dict(zip(años, links))
df1=pd.DataFrame(list(d.items()), columns=['Fecha', 'url'])
df1.set_index('Fecha', inplace=True)
Enero=pd.DataFrame()
Febrero=pd.DataFrame()
for i in df1.index:
if i[5:7]=='01':
Enero = Enero.append(df1.loc[i], ignore_index=False)
elif i[5:7]=='02':
Febrero = Febrero.append(df1.loc[i], ignore_index=False)
labels = ['PRES', 'HGHT', 'TEMP', 'DWPT', 'RELH', 'MIXR', 'DRCT', 'SKNT', 'THTA', 'THTE', 'THTV']
def reques(url):
try:
results = []
peticion=requests.get(url)
soup=bs(peticion.content, 'lxml')
pre = (soup.select_one('pre')).text
for line in pre.split('\n')[4:-1]:
#print (line)
if '--' not in line:
row = [line[i:i+7].strip() for i in range(0, len(line), 7)]
results.append(row)
else:
pass
df5=pd.DataFrame.from_records(results, columns=labels)
#return x
return df5
except AttributeError:
pass
SuperDF = pd.DataFrame()
SuperDF = pd.DataFrame(columns=labels)
startTime = datetime.now()
sin_datos=[]
for i in Febrero['url']:
try:
x=reques(i)
df2=x
y=str(df1[df1['url']==i].index.values)
df2.index = [y] * len(x)
SuperDF=SuperDF.append(x)
except TypeError:
sin_datos.append(df1[df1['url']==i].index.values)
print (df1[df1['url']==i].index.values)
SuperDF.index= SuperDF.index.map(lambda x: x.lstrip("['").rstrip("]''"))
SuperDF.index = pd.to_datetime(SuperDF.index)
SuperDF=SuperDF.apply(pd.to_numeric)
SuperDF
I've been trying to do it whit this
import seaborn as sns
SuperDF = SuperDF[(SuperDF['TEMP']==0)]
ax = SuperDF.loc['02', 'RELH'].plot(marker='o', linestyle='-')
ax.set_ylabel('RELH');
but I got this error
KeyError: '02'
It works when i pass the year but i need the mean by day for the month. Any help will be appreciate.
This is what I need

Use of datetime timedelta with numpy 3d array

I have a 3D array with the count of number of days past a benchmark date (e.g., 01.01.2000). I am interested in the actual day-of-year (DOY: 1-365/366)rather than the total number of days past a given date.
For a single value, the below syntax works. For e.g.,
import numpy as np
import datetime
data = 1595
date = datetime.datetime(2000,1,1,0,0) + datetime.timedelta(data -1)
date.timetuple().tm_yday
134
However, I am having issues with using a 3D array.
import numpy as np
import datetime
data = np.random.randint(5, size = (2,2,2))
data = data + 1595
data
array([[[1596, 1595],
[1599, 1599]],
[[1596, 1599],
[1595, 1595]]])
#Function
def Int_to_DOY(int_array):
date_ = datetime.datetime(2000,1,1,0,0) + datetime.timedelta(int_array - 1)
return date_.timetuple().tm_yday
doy_data = data * 0 #Empty array
for i in range(2):
doy_data[:, :, i] = Int_to_DOY(data[:, :, i])
Here is the error message and I am not able to figure this out.
TypeError: unsupported type for timedelta days component: numpy.ndarray
Thanks for your help.
import numpy as np
import datetime
data = np.random.randint(5, size = (2,2,2))
data = data + 1595
#Function
def Int_to_DOY(int_array):
date_ = datetime.datetime(2000,1,1,0,0) + datetime.timedelta(int(int_array) -1)
return date_.timetuple().tm_yday
doy_data = data.flatten()
for i in range(len(doy_data)):
doy_data[i] = Int_to_DOY(doy_data[i])
doy_data = doy_data.reshape((2,2,2))
Since you tagged pandas:
data = np.array([[[1596, 1595],
[1599, 1599]],
[[1596, 1599],
[1595, 1595]]])
s = pd.to_datetime('2000-01-01') + pd.to_timedelta(data.ravel(), unit='D')
s.dayofyear.values.reshape(data.shape) - 1
Output:
array([[[135, 134],
[138, 138]],
[[135, 138],
[134, 134]]], dtype=int64)

Goodness of fit always being zero despite taking random data?

I'm trying to write code that generates random data and computes goodness of fit but I'm not understanding why the chi-squared test is always zero, may I have a fix for this ? For an attempted fix I tried playing around with different types to see if I get any resulting changes in the initial output, also I've tried changing the parameters to the loop in question.
from scipy import stats
import math
import random
import numpy
import scipy
import numpy as np
def Linear_Chi2_Generate(observed_values = [], expected_values = []):
#===============================================================#
# !!!!!!! Generation of Data !!!!!!!!!! #
#===============================================================#
for i in range(0,12):
a = random.randint(-10,10)
b = random.randint(-10,10)
y = a * (b + i)
observed_values.append(y)
#######################################################################################
# !!! Array Setup !!!! #
# ***Had the Array types converted to floats before computing Chi2*** #
# #
#######################################################################################
t_s = 0
o_v = np.array(observed_values)
e_v = np.array(expected_values)
o_v_f = o_v.astype(float)
e_v_f = o_v.astype(float)
z_o_e_v_f = zip(o_v.astype(float), e_v.astype(float))
######################################################################################
for i in z_o_e_v_f:
t_s += [((o_v_f)-(e_v_f))]**2/(e_v_f) # Computs the Chi2 Stat !
######################################################################################
print("Observed Values ", o_v_f)
print("Expected Values" , e_v_f)
df=len(o_v_f)-1
print("Our goodness of fit for our linear function", stats.chi2.cdf(t_s,df))
return t_s
Linear_Chi2_Generate()
In your original code, e_v_f = o_v.astype(float) made o_v_f, e_v_f ending up the same. There was also some issue in the for loop. I have edited your code a bit. See what it does you are looking for:
from scipy import stats
import math
import random
import numpy
import scipy
import numpy as np
def Linear_Chi2_Generate(observed_values = [], expected_values = []):
#===============================================================#
# !!!!!!! Generation of Data !!!!!!!!!! #
#===============================================================#
for i in range(0,12):
a_o = random.randint(-10,10)
b_o = random.randint(-10,10)
y_o = a_o * (b_o + i)
observed_values.append(y_o)
# a_e = random.randint(-10,10)
# b_e = random.randint(-10,10)
# y_e = a_e * (b_e + i)
expected_values.append(y_o + 5)
#######################################################################################
# !!! Array Setup !!!! #
# ***Had the Array types converted to floats before computing Chi2*** #
# #
#######################################################################################
t_s = 0
o_v = np.array(observed_values)
e_v = np.array(expected_values)
o_v_f = o_v.astype(float)
e_v_f = e_v.astype(float)
z_o_e_v_f = zip(o_v.astype(float), e_v.astype(float))
######################################################################################
for o, e in z_o_e_v_f:
t_s += (o - e) **2 / e # Computs the Chi2 Stat !
######################################################################################
print("Observed Values ", o_v_f)
print("Expected Values" , e_v_f)
df=len(o_v_f)-1
print("Our goodness of fit for our linear function", stats.chi2.cdf(t_s,df))
return t_s
Linear_Chi2_Generate()

Estimating parameters using minimization in Python and speed up this process

I am trying to find parameter estimates using using minimization. The code I wrote works but there are two problems:
I finds only a local minimum. I tried to solve this by using basinhopping.
It takes very long until I get a result and since I have to do this minimization around 1000 times this becomes a big issue.
So my questions are:
Do you know how I could optimize my code so that it runs faster for the minimization.
Is there a way I can change the basinhopping part so that it runs faster? eg. set niter lower or a differnt method im not aware of. I tried running it like this and after 10 hour I didnt get a response for even one of the 1000 individuals for basinhopping.
Is there another way to find a global minimum?
Feel free to ask further questions please.
My code:
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import basinhopping
from scipy.integrate import odeint
import pickle
import os
import pandas as pd
import datetime
import numpy.random as npr
import csv
path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python"
os.chdir(path)
###IDS
df = pd.read_csv('1_Youtuber_SingleNrSheet_Comedy.csv', sep = ";", skipinitialspace=True) ######Change Name
YoutuberID = df["Channel_ID"].tolist()
##print(YoutuberID)
with open("9_p_q_m_Fun_ExtendedBass_VIEWS_Comedy_test.csv", "w" ,newline='',encoding='utf-8') as csv_file2: ######Change Name
csv_writer2 = csv.writer(csv_file2, delimiter=';')
csv_writer2.writerow(["Type","p", "q", "m","Functionvalue"])
count = 0
for ID in YoutuberID[0:]: ###Change
try:
path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python"
os.chdir(path)
###ALL INFO
Days = pd.read_csv('3_API_Call_ALL_info_Comedy_v2.csv', sep = ";", skipinitialspace=True)
views_path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python\Daily_Views_Comedy" ######Change Name
os.chdir(views_path)
SVR = pd.read_csv("4_COMEDY_DailyViews_Clean_" + str(count) + "_" + ID + ".csv", sep = ";", parse_dates=True, dayfirst=True) ######Change Name
## print(SVR[SVR.columns[0]])
SVR = SVR[SVR[SVR.columns[0]]< "2018-05-01"] ####CHANGE DATE FOR DIF CAT
## print(SVR)
#####SV Input
SV = np.array(SVR["Daily Views"])
## print(SV)
Days = Days[Days["channelId"] == ID]
## print(Days)
Days["publishedAt"] = pd.to_datetime(Days.publishedAt)
Days = Days[Days["publishedAt"] > "2015-01-08"] ##"2015-01-10"
## print(Days)
##### Timedelta #####
start_date = pd.to_datetime("2015-06-08")
##print(start_date)
video_upload_day =[]
for video_date in Days["publishedAt"]:
TimeDelta = video_date - start_date
video_upload_day.append(TimeDelta.days)
##print(video_upload_day)
##print(videoT)
nvideos = len(video_upload_day)
ndays = len(SV)
videoT = np.array(video_upload_day)
## print(videoT,nvideos,ndays)
def objective(x):
p = x[0]
q = x[1]
m = x[2]
estimateV = np.zeros( (ndays, nvideos) )
for t in range( ndays ):
for v in range( nvideos ):
if videoT[v] <= t:
estimateV[ t,v ] = p*m + (q-p) * np.sum(estimateV[0:t,v],axis=0) - (q/m) * (np.sum(estimateV[0:t,v],axis=0)**2)
estimateSV = np.sum( estimateV, axis = 1 )
return np.sum( (SV - estimateSV)**2 )
This is the minimization part. I made one for the normal minimization and one for basinhopping and seperated it with ##.
###### MINIMIZATION #######
mguess = round(sum(SV)/(nvideos*2),0)
print(sum(SV),mguess)
x0 = np.array([0.001, 0.01, mguess]) ####Make it less volatile to first guess? Make bigger steps for m?
b1 = (0.00001,0.5)
b2 = (10**4,10**7)
bnds = (b1,b1,b2)
## minimizer_kwargs = dict(method="L-BFGS-B",bounds=bnds)
## res = basinhopping(objective, x0,niter=20, minimizer_kwargs=minimizer_kwargs)
res = minimize(objective, x0,bounds = bnds)
print(res)
csv_writer2.writerow(["COMEDY",res.x[0], res.x[1],res.x[2],res.fun]) ###CHANNGE CAT
print("CURRERNT YOUTUBER IS:",count)
count += 1
except:
print("PROBLEM",count)
count += 1
## print(res,res.x[0],res.x[1],res.x[2],res.fun)

Resources