Subtract two columns using conditions - python-3.x

I have two date columns, I want to subtract the two columns based on conditions. First check for all the blanks in the first column and then check second column for blanks and the third condition check if the subtracted dates are less than one. If these conditions are satisfied, carry out subtraction of the the two columns. Something like this:
'''if [Recommendation signed] = null or [Executed Date] = null or Duration.Days([Contract Executed Date]-[Recommendation signed]) < 1 then null else Duration.Days([Contract Executed Date]-[Recommendation signed])'''

You can do that using apply function. For example you want to store the value into a new column called day difference.
Make sure these were datetime columns (if they're not apply to_datetime function).
df['Recommendation signed'] = pd.to_datetime(data['Recommendation signed']).dt.date
df['Executed Date'] = pd.to_datetime(data['Executed Date']).dt.date
df['Contract Executed Date'] = pd.to_datetime(data['Contract Executed Date']).dt.date
def substract_columns(row):
if pd.isnull(row['Recommendation signed']) or pd.isnull(row['Executed Date']) or ((row['Contract Executed Date'] - row['Recommendation signed']) == '0 days'):
return None
else:
row['Contract Executed Date'] - row['Recommendation signed']
df['day difference'] = df.apply(substract_columns, axis=1)
Hope this helps.

Here's one way to do it. Since no data was provided I created my own generator. The solution is contained within find_duration and how it is used in df.apply(find_duration, axis=1).
from datetime import datetime, timedelta
from itertools import islice
import numpy as np
import pandas as pd
RECOMMENDATION_IS_PENDING = "RECOMMENDATION_IS_PENDING"
EXECUTION_IS_PENDING = "EXECUTION_IS_PENDING"
COMPLETED_IN_LESS_THAN_ONE_DAY = "COMPLETED_IN_LESS_THAN_ONE_DAY"
COMPLETED_IN_MORE_THAN_ONE_DAY = "COMPLETED_IN_MORE_THAN_ONE_DAY"
MIN_YEAR = 1900
MAX_YEAR = 2020
NUM_YEARS = MAX_YEAR - MIN_YEAR + 1
START_DATE = datetime(MIN_YEAR, 1, 1, 00, 00, 00)
END_DATE = START_DATE + timedelta(days=365 * NUM_YEARS)
NUM_RECORDS = 20
def random_datetime(rng, dt):
return START_DATE + (END_DATE - START_DATE) * rng.uniform()
def less_than_one_day(rng, dt):
hours = int(np.round(23.0 * rng.uniform()))
return dt + timedelta(hours=hours)
def more_than_one_day(rng, dt):
days = 1 + int(np.round(100.0 * rng.uniform()))
return dt + timedelta(days=days)
def null_datetime(rng, dt):
return None
class RecordGenerator:
PROBABILITIES = {
RECOMMENDATION_IS_PENDING: 0.1,
EXECUTION_IS_PENDING: 0.2,
COMPLETED_IN_LESS_THAN_ONE_DAY: 0.2,
COMPLETED_IN_MORE_THAN_ONE_DAY: 0.5,
}
GENERATORS = {
RECOMMENDATION_IS_PENDING: (null_datetime, random_datetime),
EXECUTION_IS_PENDING: (random_datetime, null_datetime),
COMPLETED_IN_LESS_THAN_ONE_DAY: (random_datetime, less_than_one_day),
COMPLETED_IN_MORE_THAN_ONE_DAY: (random_datetime, more_than_one_day),
}
def __init__(self, seed=0):
self.rng = np.random.RandomState(seed)
def __iter__(self):
while True:
res = self.rng.uniform()
for kind, val in self.PROBABILITIES.items():
res -= val
if res <= 0.0:
break
recommendation_signed_fn, execution_date_fn = self.GENERATORS[kind]
recommendation_signed = recommendation_signed_fn(self.rng, None)
execution_date = execution_date_fn(self.rng, recommendation_signed)
yield recommendation_signed, execution_date
def find_duration(df):
duration = df["execution_date"] - df["recommendation_signed"]
if duration is pd.NaT or duration < pd.Timedelta(days=1):
return None
return duration
if __name__ == "__main__":
records = RecordGenerator()
recommendation_signed_dates, execution_dates = zip(*islice(records, NUM_RECORDS))
df = pd.DataFrame.from_dict({
"recommendation_signed": recommendation_signed_dates,
"execution_date": execution_dates,
})
print(f"`recommendation_signed` is null: [{df['recommendation_signed'].isnull().sum()}]")
print(f"`execution_date` is null: [{df['execution_date'].isnull().sum()}]")
print(f"`completed_in_less_than_one_day`: [{((df['execution_date'] - df['recommendation_signed']) < pd.Timedelta(days=1)).sum()}]")
print(f"`completed_in_more_than_one_day`: [{((df['execution_date'] - df['recommendation_signed']) >= pd.Timedelta(days=1)).sum()}]")
df["completion_time"] = df.apply(find_duration, axis=1)
print(df)
Output:
`recommendation_signed` is null: [2]
`execution_date` is null: [2]
`completed_in_less_than_one_day`: [4]
`completed_in_more_than_one_day`: [12]
recommendation_signed execution_date completion_time
0 1986-06-25 08:07:14.808395 1986-08-25 08:07:14.808395 61 days
1 1951-03-25 17:08:27.986156 1951-05-30 17:08:27.986156 66 days
2 2007-11-01 03:42:35.672304 2007-11-02 01:42:35.672304 NaT
3 1995-09-26 12:52:16.917964 1995-09-27 00:52:16.917964 NaT
4 2011-12-03 23:24:45.808880 2011-12-11 23:24:45.808880 8 days
5 NaT 1902-06-12 22:41:33.183052 NaT
6 1994-02-04 07:01:47.052493 1994-05-03 07:01:47.052493 88 days
7 1996-08-19 20:06:42.217770 1996-10-05 20:06:42.217770 47 days
8 1914-04-21 14:09:37.598524 1914-06-25 14:09:37.598524 65 days
9 2014-03-25 07:15:55.137157 NaT NaT
10 1950-02-21 13:04:11.684479 1950-03-20 13:04:11.684479 27 days
11 1955-02-27 21:06:22.090510 1955-04-26 21:06:22.090510 58 days
12 NaT 1974-09-07 20:55:17.329968 NaT
13 1974-08-07 21:21:33.578522 1974-11-10 21:21:33.578522 95 days
14 1943-06-22 15:59:39.451885 1943-08-06 15:59:39.451885 45 days
15 1907-04-14 20:35:27.269379 1907-06-21 20:35:27.269379 68 days
16 1925-06-10 13:05:57.968982 1925-06-24 13:05:57.968982 14 days
17 1943-12-25 06:52:07.566032 1943-12-25 19:52:07.566032 NaT
18 2019-07-07 12:44:00.201327 2019-07-07 14:44:00.201327 NaT
19 1919-07-05 05:38:11.678570 NaT NaT

You could try something like this:
import numpy as np
from datetime import datetime, timedelta
df['Recommendation Signed'] = pd.to_datetime(df['Recommendation Signed'], errors='coerce')
df['Contract Executed Date'] = pd.to_datetime(df['Contract Executed Date'], errors='coerce')
df['date_difference'] = np.where(df['Recommendation Signed'].isnull() | df['Contract Executed Date'].isnull() | ((df['Contract Executed Date'] - df['Recommendation Signed'] ) < timedelta(days=1)), np.datetime64('NaT'), df['Contract Executed Date'] - df['Recommendation Signed'])

Related

Python, Pandas - count values based on multiple criteria in row and multiple columns

import pandas as pd
import datetime as dt
df = []
df = pd.DataFrame({"Sales": [1000, 2000, 3000, 4000, 5000], "Dates": pd.date_range(dt.date.today(), periods=5).astype(str)})
myDate = "2020-01-12"
def count_Commission(row):
if (row > 3000 or df.Dates < myDate):
return row * 0.1
else:
return 0
df['Commission'] = df.Sales.apply(count_Commission)
print(df)
I want to calculate commission base on criteria in Sales (value > 3000) and Dates (for dates earlier than myDate). I would love to see solutions with lambda and without lambda AND as a separate function or simple code (without def dedicated function).
With a lambda:
df['Commission'] = df.apply(lambda row: row['Sales'] * 0.1 if (row['Sales'] > 3000 or row['Dates'] < myDate) else 0, axis=1)
With a "dedicated function":
def calculate_commission(row):
return row['Sales'] * 0.1 if (row['Sales'] > 3000 or row['Dates'] < myDate
df['Commission'] = df.apply(calculate_commission, axis=1)
Vectorized (fastest):
df['Commission'] = np.where((df['Sales'] > 3000) | (df['Dates'] < myDate), df['Sales'] * 0.1, 0)
Try:
import numpy as np
df['Commission'] = np.where((df.Dates<myDate) | (df.Sales>3000), df.Sales*0.1, 0)
As an alternative you can use loc[...] method:
df['Commission']=0
df.loc[(df.Dates<myDate) | (df.Sales>3000), 'Commission'] = df.Sales*0.1
Output:
Sales Dates Commission
0 1000 2020-01-12 0.0
1 2000 2020-01-13 0.0
2 3000 2020-01-14 0.0
3 4000 2020-01-15 400.0
4 5000 2020-01-16 500.0

How to transform a dataframe based on if,else conditions?

I am trying to build a function which transform a dataframe based on certain conditions but I am getting a Systax Error. I am not sure what I am doing wrong. Any help will be appreciated. Thank you!
import pandas as pd
from datetime import datetime
from datetime import timedelta
df=pd.read_csv('example1.csv')
df.columns =(['dtime','kW'])
df['dtime'] = pd.to_datetime(df['dtime'])
df.head(5)
dtime kW
0 2019-08-27 23:30:00 0.016
1 2019-08-27 23:00:00 0
2 2019-08-27 22:30:00 0.016
3 2019-08-27 22:00:00 0.016
4 2019-08-27 21:30:00 0
def transdf(df):
a=df.loc[0,'dtime']
b=df.loc[1,'dtime']
c=a-b
minutes = c.total_seconds() / 60
d=int(minutes)
#d can be only 15 ,30 or 60
if d==15:
return df=df.set_index('dtime').asfreq('-15T',fill_value='Missing')
elif d==30:
return df=df.set_index('dtime').asfreq('-30T',fill_value='Missing')
elif d==60:
return df=df.set_index('dtime').asfreq('-60T',fill_value='Missing')
else:
return None
first. It is more efficient to have the return statement after the else at the end of your code. Inside each of the cases just update the value for df. Return is part of your function, not the if statement that's why you are getting errors.
def transform(df):
a = df.loc[0, 'dtime']
b = df.loc[1, 'dtime']
c = a - b
minutes = c.total_seconds() / 60
d=int(minutes)
#d can be only 15 ,30 or 60
if d==15:
df= df.set_index('dtime').asfreq('-15T',fill_value='Missing')
elif d==30:
df= df.set_index('dtime').asfreq('-30T',fill_value='Missing')
elif d==60:
df= df.set_index('dtime').asfreq('-60T',fill_value='Missing')
else:
None
return dfere

How to have a chart multiple columns continuously by iterating through a data-frame with matplotlib

BACKGROUND INFORMATION:
I have dataframe of x many stocks with y price sets (closing & 3 day SMA), (currently this is 5 and 2 respectively (one is closing price, the other is a 3 day Simple Moving Average SMA).
The current output is [2781 rows x 10 columns] with a ranging data set start_date = '2006-01-01' till end_date = '2016-12-31'. The output is as follows as a dataframe print(df):
CURRENT OUTPUT:
ANZ Price ANZ 3 day SMA CBA Price CBA 3 day SMA MQG Price MQG 3 day SMA NAB Price NAB 3 day SMA WBC Price WBC 3 day SMA
Date
2006-01-02 23.910000 NaN 42.569401 NaN 66.558502 NaN 30.792999 NaN 22.566401 NaN
2006-01-03 24.040001 NaN 42.619099 NaN 66.086403 NaN 30.935699 NaN 22.705400 NaN
2006-01-04 24.180000 24.043334 42.738400 42.642300 66.587997 66.410967 31.078400 30.935699 22.784901 22.685567
2006-01-05 24.219999 24.146667 42.708599 42.688699 66.558502 66.410967 30.964300 30.992800 22.794800 22.761700
... ... ... ... ... ... ... ... ... ... ...
2016-12-27 87.346667 30.670000 30.706666 32.869999 32.729999 87.346667 30.670000 30.706666 32.869999 32.729999
2016-12-28 87.456667 31.000000 30.773333 32.980000 32.829999 87.456667 31.000000 30.773333 32.980000 32.829999
2016-12-29 87.520002 30.670000 30.780000 32.599998 32.816666 87.520002 30.670000 30.780000 32.599998 32.816666
MY WORKING CODE:
#!/usr/bin/python3
from pandas_datareader import data
import pandas as pd
import itertools as it
import os
import numpy as np
import fix_yahoo_finance as yf
import matplotlib.pyplot as plt
yf.pdr_override()
stock_list = sorted(["ANZ.AX", "WBC.AX", "MQG.AX", "CBA.AX", "NAB.AX"])
number_of_decimal_places = 8
moving_average_period = 3
def get_moving_average(df, stock_name):
df2 = df.rolling(window=moving_average_period).mean()
df2.rename(columns={stock_name: stock_name.replace("Price", str(moving_average_period) + " day SMA")}, inplace=True)
df = pd.concat([df, df2], axis=1, join_axes=[df.index])
return df
# Function to get the closing price of the individual stocks
# from the stock_list list
def get_closing_price(stock_name, specific_close):
symbol = stock_name
start_date = '2006-01-01'
end_date = '2016-12-31'
df = data.get_data_yahoo(symbol, start_date, end_date)
sym = symbol + " "
print(sym * 10)
df = df.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1)
df = df.rename(columns={'Close': specific_close})
# https://stackoverflow.com/questions/16729483/converting-strings-to-floats-in-a-dataframe
# df[specific_close] = df[specific_close].astype('float64')
# print(type(df[specific_close]))
return df
# Creates a big DataFrame with all the stock's Closing
# Price returns the DataFrame
def get_all_close_prices(directory):
count = 0
for stock_name in stock_list:
specific_close = stock_name.replace(".AX", "") + " Price"
if not count:
prev_df = get_closing_price(stock_name, specific_close)
prev_df = get_moving_average(prev_df, specific_close)
else:
new_df = get_closing_price(stock_name, specific_close)
new_df = get_moving_average(new_df, specific_close)
# https://stackoverflow.com/questions/11637384/pandas-join-merge-concat-two-dataframes
prev_df = prev_df.join(new_df)
count += 1
# prev_df.to_csv(directory)
df = pd.DataFrame(prev_df, columns=list(prev_df))
df = df.apply(pd.to_numeric)
convert_df_to_csv(df, directory)
return df
def convert_df_to_csv(df, directory):
df.to_csv(directory)
def main():
# FINDS THE CURRENT DIRECTORY AND CREATES THE CSV TO DUMP THE DF
csv_in_current_directory = os.getcwd() + "/stock_output.csv"
csv_in_current_directory_dow_distribution = os.getcwd() + "/dow_distribution.csv"
# FUNCTION THAT GETS ALL THE CLOSING PRICES OF THE STOCKS
# AND RETURNS IT AS ONE COMPLETE DATAFRAME
df = get_all_close_prices(csv_in_current_directory)
print(df)
# Main line of code
if __name__ == "__main__":
main()
QUESTION:
From this df I want to create x many lines graphs (one graph per stock) with y many lines (price, and SMAs). How can I do this with matplotlib? Could this be done with a for loop and save the individuals plots as the loop gets iterated? If so how?
First import import matplotlib.pyplot as plt.
Then it depends whether you want x many individual plots or one plot with x many subplots:
Individual plots
df.plot(y=[0,1])
df.plot(y=[2,3])
df.plot(y=[4,5])
df.plot(y=[6,7])
df.plot(y=[8,9])
plt.show()
You can also save the individual plots in a loop:
for i in range(0,9,2):
df.plot(y=[i,i+1])
plt.savefig('{}.png'.format(i))
Subplots
fig, axes = plt.subplots(nrows=2, ncols=3)
df.plot(ax=axes[0,0],y=[0,1])
df.plot(ax=axes[0,1],y=[2,3])
df.plot(ax=axes[0,2],y=[4,5])
df.plot(ax=axes[1,0],y=[6,7])
df.plot(ax=axes[1,1],y=[8,9])
plt.show()
See https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.plot.html for options to customize your plot(s).
The best approach is to make a function that is dependent on the size of your lists x and y. Thereby the function should be as follows:
def generate_SMA_graphs(df):
columnNames = list(df.head(0))
print("CN:\t", columnNames)
print(len(columnNames))
count = 0
for stock in stock_list:
stock_iter = count * (len(moving_average_period_list) + 1)
sma_iter = stock_iter + 1
for moving_average_period in moving_average_period_list:
fig = plt.figure()
df.plot(y=[columnNames[stock_iter], columnNames[sma_iter]])
plt.xlabel('Time')
plt.ylabel('Price ($)')
graph_title = columnNames[stock_iter] + " vs. " + columnNames[sma_iter]
plt.title(graph_title)
plt.grid(True)
plt.savefig(graph_title.replace(" ", "") + ".png")
print("\t\t\t\tCompleted: ", graph_title)
plt.close(fig)
sma_iter += 1
count += 1
With the code above, irrespective how ever long either list is (for x or y, stock list or SMA list) the above function will generate a graph comparing the original price with every SMA for that given stock.

how to replace a cell in a pandas dataframe

After forming the below python pandas dataframe (for example)
import pandas
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pandas.DataFrame(data,columns=['Name','Age'])
If I iterate through it, I get
In [62]: for i in df.itertuples():
...: print( i.Index, i.Name, i.Age )
...:
0 Alex 10
1 Bob 12
2 Clarke 13
What I would like to achieve is to replace the value of a particular cell
In [67]: for i in df.itertuples():
...: if i.Name == "Alex":
...: df.at[i.Index, 'Age'] = 100
...:
Which seems to work
In [64]: df
Out[64]:
Name Age
0 Alex 100
1 Bob 12
2 Clarke 13
The problem is that when using a larger different dataset, and do:
First, I create a new column named like NETELEMENT with a default value of ""
I would like to replace the default value "" with the string that the function lookup_netelement returns
df['NETELEMENT'] = ""
for i in df.itertuples():
df.at[i.Index, 'NETELEMENT'] = lookup_netelement(i.PEER_SRC_IP)
print( i, lookup_netelement(i.PEER_SRC_IP) )
But what I get as a result is:
Pandas(Index=769, SRC_AS='', DST_AS='', COMMS='', SRC_COMMS=nan, AS_PATH='', SRC_AS_PATH=nan, PREF='', SRC_PREF='0', MED='0', SRC_MED='0', PEER_SRC_AS='0', PEER_DST_AS='', PEER_SRC_IP='x.x.x.x', PEER_DST_IP='', IN_IFACE='', OUT_IFACE='', PROTOCOL='udp', TOS='0', BPS=35200.0, SRC_PREFIX='', DST_PREFIX='', NETELEMENT='', IN_IFNAME='', OUT_IFNAME='') routerX
meaning that it should be:
NETELEMENT='routerX' instead of NETELEMENT=''
Could you please advise what I am doing wrong ?
EDIT: for reasons of completeness the lookup_netelement is defined as
def lookup_netelement(ipaddr):
try:
x = LOOKUP['conn'].hget('ipaddr;{}'.format(ipaddr), 'dev') or b""
except:
logger.error('looking up `ipaddr` for netelement caused `{}`'.format(repr(e)), exc_info=True)
x = b""
x = x.decode("utf-8")
return x
Hope you are looking for where for conditional replacement i.e
def wow(x):
return x ** 10
df['new'] = df['Age'].where(~(df['Name'] == 'Alex'),wow(df['Age']))
Output :
Name Age new
0 Alex 10 10000000000
1 Bob 12 12
2 Clarke 13 13
3 Alex 15 576650390625
Based on your edit your trying to apply the function i.e
df['new'] = df['PEER_SRC_IP'].apply(lookup_netelement)
Edit : For your comment on sending two columns, use lambda with axis 1 i.e
def wow(x,y):
return '{} {}'.format(x,y)
df.apply(lambda x : wow(x['Name'],x['Age']),1)

Subtraction between 'dict_values' and 'float'

I am getting the error "TypeError: unsupported operand type(s) for -: 'dict_values' and 'float'" from line 173 in the sample code. I have copied from a book that does not yet seem to be updated to Python 3 and other forum topics don't seem to cover this problem.
It is trying to calculate the error in an optimsation for the difference in market values and model values, but the data storage type is different across the two.
Thanks
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import calendar
# frame
from get_year_deltas import get_year_deltas
from constant_short_rate import constant_short_rate
from market_environment import market_environment
from plot_option_stats import plot_option_stats
# simulation
from sn_random_numbers import sn_random_numbers
from simulation_class import simulation_class
from geometric_brownian_motion import geometric_brownian_motion
from jump_diffusion import jump_diffusion
from square_root_diffusion import square_root_diffusion
# valuation
from valuation_class import valuation_class
from valuation_mcs_european import valuation_mcs_european
from valuation_mcs_american import valuation_mcs_american
from derivatives_position import derivatives_position
from derivatives_portfolio import derivatives_portfolio
#import os
#path = os.getcwd()
url = 'http://www.stoxx.com/download/historical_values/h_vstoxx.txt'
vstoxx_index = pd.read_csv(url, index_col=0, header=2,parse_dates=True, dayfirst=True)
vstoxx_index = vstoxx_index[('2013/12/31' < vstoxx_index.index) & (vstoxx_index.index < '2014/4/1')]
vstoxx_futures = pd.read_excel('./vstoxx_march_2014.xlsx', 'vstoxx_futures')
del vstoxx_futures['A_SETTLEMENT_PRICE_SCALED']
del vstoxx_futures['A_CALL_PUT_FLAG']
del vstoxx_futures['A_EXERCISE_PRICE']
del vstoxx_futures['A_PRODUCT_ID']
columns = ['DATE', 'EXP_YEAR', 'EXP_MONTH', 'PRICE']
vstoxx_futures.columns = columns
def third_friday(date):
day = 21 - (calendar.weekday(date.year, date.month, 1) + 2) % 7
return dt.datetime(date.year, date.month, day)
set(vstoxx_futures['EXP_MONTH'])
third_fridays = {}
for month in set(vstoxx_futures['EXP_MONTH']):
third_fridays[month] = third_friday(dt.datetime(2014, month, 1))
#third_fridays
tf = lambda x: third_fridays[x]
vstoxx_futures['MATURITY'] = vstoxx_futures['EXP_MONTH'].apply(tf)
#vstoxx_futures.tail()
vstoxx_options = pd.read_excel('./vstoxx_march_2014.xlsx', 'vstoxx_options')
#vstoxx_options.info()
del vstoxx_options['A_SETTLEMENT_PRICE_SCALED']
del vstoxx_options['A_PRODUCT_ID']
columns = ['DATE', 'EXP_YEAR', 'EXP_MONTH', 'TYPE', 'STRIKE', 'PRICE']
vstoxx_options.columns = columns
vstoxx_options['MATURITY'] = vstoxx_options['EXP_MONTH'].apply(tf)
#vstoxx_options.head()
vstoxx_options['STRIKE'] = vstoxx_options['STRIKE'] / 100.0
save = False
if save is True:
import warnings
warnings.simplefilter('ignore')
h5 = pd.HDFStore('./vstoxx_march_2014.h5', complevel=9, complib='blosc')
h5['vstoxx_index'] = vstoxx_index
h5['vstoxx_futures'] = vstoxx_futures
h5['vstoxx_options'] = vstoxx_options
h5.close()
pricing_date = dt.datetime(2014, 3, 31)
# last trading day in March 2014
maturity = third_fridays[10]
# October maturity
initial_value = vstoxx_index['V2TX'][pricing_date]
# VSTOXX on pricing_date
forward = vstoxx_futures[(vstoxx_futures.DATE == pricing_date) & (vstoxx_futures.MATURITY == maturity)]['PRICE'].values[0]
tol = 0.20
option_selection = vstoxx_options[(vstoxx_options.DATE == pricing_date)
& (vstoxx_options.MATURITY == maturity)
& (vstoxx_options.TYPE == 'C')
& (vstoxx_options.STRIKE > (1 - tol) * forward)
& (vstoxx_options.STRIKE < (1 + tol) * forward)]
me_vstoxx = market_environment('me_vstoxx', pricing_date)
me_vstoxx.add_constant('initial_value', initial_value)
me_vstoxx.add_constant('final_date', maturity)
me_vstoxx.add_constant('currency', 'EUR')
me_vstoxx.add_constant('frequency', 'B')
me_vstoxx.add_constant('paths', 10000)
csr = constant_short_rate('csr', 0.01)
# somewhat arbitrarily chosen here
me_vstoxx.add_curve('discount_curve', csr)
# parameters to be calibrated later
me_vstoxx.add_constant('kappa', 1.0)
me_vstoxx.add_constant('theta', 1.2 * initial_value)
vol_est = vstoxx_index['V2TX'].std() * np.sqrt(len(vstoxx_index['V2TX']) / 252.0)
me_vstoxx.add_constant('volatility', vol_est)
# vol_est
vstoxx_model = square_root_diffusion('vstoxx_model', me_vstoxx)
me_vstoxx.add_constant('strike', forward)
me_vstoxx.add_constant('maturity', maturity)
payoff_func = 'np.maximum(maturity_value - strike, 0)'
vstoxx_eur_call = valuation_mcs_european('vstoxx_eur_call',vstoxx_model, me_vstoxx, payoff_func)
option_models = {}
for option in option_selection.index:
strike = option_selection['STRIKE'].ix[option]
me_vstoxx.add_constant('strike', strike)
option_models[option] = valuation_mcs_european( 'eur_call_%d' % strike, vstoxx_model, me_vstoxx, payoff_func )
def calculate_model_values(p0):
'''
Returns all relevant option values.
Parameters
p0 : tuple/list, tuple of kappa, theta, volatility
Returns
model_values : dict, dictionary with model values
'''
kappa, theta, volatility = p0
vstoxx_model.update(kappa=kappa,
theta=theta,
volatility=volatility)
model_values = {}
for option in option_models:
model_values[option] = option_models[option].present_value(fixed_seed=True)
return model_values
# calculate_model_values((0.5, 27.5, vol_est))
i = 0
def mean_squared_error(p0):
'''
Returns the mean-squared error given the model and market values.
Parameters
p0 : tuple/list, tuple of kappa, theta, volatility
Returns
MSE : float, mean-squared error
'''
global i
model_values = np.array(calculate_model_values(p0).values())
market_values = option_selection['PRICE'].values
option_diffs = model_values - market_values
MSE = np.sum(option_diffs ** 2) / len(option_diffs)
# vectorized MSE calculation
if i % 20 == 0:
if i == 0:
print( '%4s' % i, '%6s' % "kappa", '%6s' % "theta", '%6s —>' % "vola", '%6s' % "MSE")
print( '%4d' % i, '%6.3f' % p0[0], '%6.3f' % p0[1], '%6.3f —>' % p0[2], '%6.3f' % MSE )
i += 1
return MSE
mean_squared_error((0.5, 27.5, vol_est))

Resources