In [2]:
import time
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import norm
from urllib.request import urlopen
from scipy.optimize import minimize, basinhopping, NonlinearConstraint
from sklearn.neighbors import KernelDensity
# warnings.filterwarnings('ignore')
In [3]:
def sigma_func_linear(x, a, b):
    return a * x + b

def sigma_func_quad(x, a, b, c):
    return a * x**2 + b * x + c

def sigma_func_quad_diff(x, a, b, c, a2):
    return a * x**2 * (x >= 0) + a2 * x**2 * (x < 0) + b * x + c

def calc_call_premium(K, T, sigma):
    sigma = np.where(sigma>0, sigma, 1e-4)
    d1 = (np.log(1/K) + sigma**2 / 2 * T) / (sigma * np.sqrt(T))
    d2 = (np.log(1/K) - sigma**2 / 2 * T) / (sigma * np.sqrt(T))
    c = norm.cdf(d1) - K * norm.cdf(d2)
    return c
In [4]:
def calc_density(x, r, T, params, delta_x=0.001, method='linear'):
    """
    Approximate the density of x = log(K) using butterfly
    :param x: float / np.array(dtype=float), log return
    :param r: float
    :param params: array-like object, 
        linear: a, b
        quad: a, b, c
        quad_diff: a, b, c, a2
        svi: a, b, m, rho, sigma
    :delta_x: float, default 0.001
    :method: str, the analytical form of volatility function, can be 'linear'/'quad'/'quad_diff'/'svi'
    
    :return: float / np.array(dtype=float), a series of density
    """
    # params: a, b, c, a2
    if method == 'linear' and len(params) == 2:
        x1, x2, a, b = x + np.log(1 + delta_x), x + np.log(1 - delta_x), params[0], params[1]
        sigma1, sigma, sigma2 = a * x1 + b,\
                                a * x + b,\
                                a * x2 + b
    elif method == 'quad' and len(params) == 3:
        x1, x2, a, b, c = x + np.log(1 + delta_x), x + np.log(1 - delta_x), params[0], params[1], params[2]
        sigma1, sigma, sigma2 = a * x1**2 + b * x1 + c,\
                                a * x**2 + b * x + c,\
                                a * x2**2 + b * x2 + c
    elif method == 'quad_diff' and len(params) == 4:
        x1, x2, a, b, c, a2 = x + np.log(1 + delta_x), x + np.log(1 - delta_x), params[0], params[1], params[2], params[3]
        sigma1, sigma, sigma2 = a * x1**2 * (x1 >= 0) + a2 * x1**2 * (x1 < 0) + b * x1 + c,\
                                a * x**2 * (x >= 0) + a2 * x**2 * (x < 0) + b * x + c,\
                                a * x2**2 * (x2 >= 0) + a2 * x2**2 * (x2 < 0) + b * x2 + c
    elif method == 'svi' and len(params) == 5:
        x1, x2 = x + np.log(1 + delta_x), x + np.log(1 - delta_x)
        sigma1, sigma, sigma2 = SVIvol(params, x1), SVIvol(params, x), SVIvol(params, x2)
    else:
        raise Exception("Wrong param dimension or fit method!")

    f = np.exp(r * T) * (calc_call_premium(np.exp(x1), T, sigma1)
                         - 2 * calc_call_premium(np.exp(x), T, sigma)
                         + calc_call_premium(np.exp(x2), T, sigma2)) / (np.exp(x) * delta_x**2)
    return f

def obj_func_least_square(params, r_series, r_density, term_day, method):
    """
    Compute mean of squared errors (avg(SSE)) given the true density
    :param params: parameters in volatility function
    :r_series: array-like, the input in the volatility function, used to predict density
    :r_density: array-like, true density
    :term_day: int, T
    :method: str, the analytical form of volatility function
    """
    density = calc_density(r_series, r=0, T=term_day/260, params=params, delta_x=0.001, method=method)
    return np.mean((r_density - density)**2)
In [5]:
raw_df = pd.read_csv('raw_df.csv', index_col=0)
raw_df.index = pd.to_datetime(raw_df.index)
df = raw_df.copy()
df = df.reindex(pd.date_range(start="1970-01-02", end="2018-12-31", freq='B'))
df['log_SPX'] = np.log(df["SPX"])
df['r'] = df['log_SPX'].diff()
term_days = [5, 10] + list(range(20, 521, 20))
term_name = [str(i) + "D" for i in term_days]
terms = dict(zip(term_name, term_days))
for k, v in terms.items():
    df[k + '_ret'] = df["r"].rolling(v).sum()
    df[k + '_ret'] = df[k + '_ret'] - np.log(np.exp(df[k + '_ret']).mean())
In [6]:
df
Out[6]:
SPX log_SPX r 5D_ret 10D_ret 20D_ret 40D_ret 60D_ret 80D_ret 100D_ret ... 340D_ret 360D_ret 380D_ret 400D_ret 420D_ret 440D_ret 460D_ret 480D_ret 500D_ret 520D_ret
1970-01-02 93.000000 4.532599 NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-05 93.459999 4.537534 0.004934 NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-06 92.820000 4.530662 -0.006871 NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-07 92.629997 4.528613 -0.002049 NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-08 92.680000 4.529153 0.000540 NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-09 92.400002 4.526127 -0.003026 -0.008015 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-12 91.699997 4.518522 -0.007605 -0.020554 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-13 91.919998 4.520919 0.002396 -0.011286 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-14 91.650002 4.517977 -0.002942 -0.012178 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-15 91.680000 4.518304 0.000327 -0.012391 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-16 90.919998 4.509980 -0.008324 -0.017689 -0.025674 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-19 89.650002 4.495913 -0.014067 -0.024151 -0.044675 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-20 89.830002 4.497919 0.002006 -0.024542 -0.035798 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-21 89.949997 4.499254 0.001335 -0.020265 -0.032414 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-22 90.040001 4.500254 0.001000 -0.019593 -0.031954 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-23 89.370003 4.492785 -0.007469 -0.018737 -0.036397 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-26 88.169998 4.479267 -0.013518 -0.018189 -0.042311 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-27 87.620003 4.473009 -0.006257 -0.026452 -0.050964 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-28 86.790001 4.463491 -0.009518 -0.037305 -0.057541 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-29 85.690002 4.450736 -0.012755 -0.051060 -0.070623 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-01-30 85.019997 4.442886 -0.007850 -0.051441 -0.070148 -0.095886 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-02 85.750000 4.451436 0.008550 -0.029373 -0.047532 -0.092270 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-03 86.769997 4.463261 0.011825 -0.011291 -0.037713 -0.073574 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-04 86.239998 4.457134 -0.006127 -0.007900 -0.045175 -0.077652 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-05 85.900002 4.453184 -0.003950 0.000905 -0.050125 -0.082142 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-06 86.330002 4.458177 0.004993 0.013748 -0.037663 -0.074123 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-09 87.010002 4.466023 0.007846 0.013045 -0.016299 -0.058672 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-10 86.099998 4.455509 -0.010514 -0.009294 -0.020555 -0.071582 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-11 86.940002 4.465218 0.009709 0.006542 -0.001328 -0.058931 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1970-02-12 86.730003 4.462800 -0.002418 0.008074 0.009009 -0.061677 NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2018-11-20 2641.889893 7.879250 -0.018318 -0.031481 -0.045141 -0.042888 -0.111059 -0.111141 -0.089132 -0.058506 ... -0.045388 -0.032288 -0.038697 -0.031559 -0.019769 -0.030994 -0.025327 -0.001304 -0.008810 0.015471
2018-11-21 2649.929932 7.882288 0.003039 -0.020846 -0.063090 -0.008498 -0.104726 -0.113787 -0.085053 -0.055468 ... -0.042841 -0.030702 -0.037225 -0.029651 -0.012963 -0.036295 -0.027268 -0.000028 -0.003311 0.017702
2018-11-22 2649.929932 7.882288 0.000000 -0.031384 -0.060578 -0.026952 -0.107485 -0.109347 -0.089967 -0.064051 ... -0.040655 -0.021289 -0.037493 -0.027485 -0.006125 -0.034667 -0.026403 0.003588 -0.001446 0.017702
2018-11-23 2632.560059 7.875712 -0.006576 -0.040181 -0.057912 -0.016049 -0.114055 -0.116058 -0.101177 -0.079073 ... -0.049119 -0.034248 -0.043239 -0.032582 -0.012702 -0.039928 -0.034657 -0.006349 -0.009273 0.007219
2018-11-26 2673.449951 7.891125 0.015413 -0.007985 -0.022601 0.005945 -0.102277 -0.100645 -0.089296 -0.072445 ... -0.035352 -0.019762 -0.026846 -0.021934 -0.005865 -0.022503 -0.019244 0.011758 0.006140 0.027900
2018-11-27 2682.169922 7.894381 0.003256 0.013589 -0.017862 -0.006344 -0.098623 -0.095733 -0.088860 -0.072656 ... -0.029678 -0.015723 -0.028091 -0.017991 0.000299 -0.006761 -0.022017 0.008471 0.007150 0.029822
2018-11-28 2743.790039 7.917095 0.022714 0.033265 0.012448 0.005577 -0.076621 -0.070212 -0.065883 -0.042822 ... -0.006601 -0.000288 -0.004381 0.023069 0.024731 0.014065 0.001779 0.023191 0.038256 0.055193
2018-11-29 2737.800049 7.914910 -0.002185 0.031079 -0.000275 -0.007111 -0.070603 -0.068738 -0.066626 -0.053719 ... 0.005794 -0.004346 -0.004324 0.017203 0.015016 0.012940 -0.000825 0.021741 0.036364 0.056530
2018-11-30 2760.169922 7.923048 0.008138 0.045793 0.005642 0.007363 -0.056922 -0.058385 -0.051349 -0.046660 ... 0.012657 -0.000871 0.003529 0.018596 0.026193 0.021922 0.005820 0.030745 0.049149 0.064270
2018-12-03 2790.370117 7.933930 0.010882 0.041262 0.033307 0.012661 -0.045645 -0.049399 -0.036453 -0.034749 ... 0.013545 0.010063 0.006099 0.024331 0.026294 0.033824 0.015685 0.047655 0.060031 0.069348
2018-12-04 2700.060059 7.901029 -0.032900 0.005106 0.018725 -0.026479 -0.077126 -0.086032 -0.075722 -0.071615 ... -0.018856 -0.023434 -0.020082 -0.010405 -0.012679 -0.006301 -0.014634 0.015645 0.018680 0.033043
2018-12-05 2700.060059 7.901029 0.000000 -0.017609 0.015686 -0.047467 -0.043710 -0.086389 -0.068091 -0.073773 ... -0.020275 -0.028793 -0.019499 -0.012891 -0.012193 -0.007386 -0.028215 0.015347 0.012974 0.019965
2018-12-06 2695.949951 7.899506 -0.001523 -0.016946 0.014162 -0.046478 -0.024446 -0.093180 -0.077503 -0.071336 ... -0.006241 -0.030162 -0.020567 -0.018847 -0.014269 -0.011840 -0.023861 0.013253 0.012222 0.016285
2018-12-07 2633.080078 7.875910 -0.023596 -0.048680 -0.002857 -0.060833 -0.062148 -0.117052 -0.104417 -0.093983 ... -0.028000 -0.053391 -0.045723 -0.042754 -0.035950 -0.033179 -0.047961 -0.017582 -0.014885 -0.013233
2018-12-10 2637.719971 7.877670 0.001761 -0.057802 -0.016510 -0.039174 -0.054465 -0.109706 -0.105081 -0.094059 ... -0.027402 -0.050566 -0.044278 -0.040993 -0.035921 -0.029775 -0.042918 -0.013703 -0.009570 -0.010334
2018-12-11 2636.780029 7.877314 -0.000356 -0.025258 -0.020123 -0.038047 -0.076089 -0.115417 -0.107504 -0.099185 ... -0.037650 -0.053841 -0.036529 -0.040144 -0.037465 -0.030691 -0.040357 -0.014287 -0.009926 -0.017209
2018-12-12 2651.070068 7.882719 0.005405 -0.019853 -0.037432 -0.025046 -0.070432 -0.111265 -0.101701 -0.102840 ... -0.028785 -0.048719 -0.039893 -0.034279 -0.030788 -0.022226 -0.032665 -0.009575 -0.007347 -0.003654
2018-12-13 2650.540039 7.882519 -0.000200 -0.018530 -0.035446 -0.035784 -0.056135 -0.119275 -0.100208 -0.100003 ... -0.026909 -0.047946 -0.031456 -0.042022 -0.031570 -0.024354 -0.033665 -0.015511 -0.005400 -0.007730
2018-12-14 2599.949951 7.863247 -0.019271 -0.014204 -0.062855 -0.057276 -0.075045 -0.138178 -0.125659 -0.112691 ... -0.047851 -0.065875 -0.052259 -0.064994 -0.054922 -0.042798 -0.056199 -0.038342 -0.026519 -0.025249
2018-12-17 2545.939941 7.842255 -0.020992 -0.036957 -0.094729 -0.061485 -0.091729 -0.155648 -0.154293 -0.127912 ... -0.069331 -0.086139 -0.075560 -0.084768 -0.075951 -0.064477 -0.077558 -0.064566 -0.047511 -0.048214
2018-12-18 2546.159912 7.842342 0.000086 -0.036515 -0.061743 -0.043081 -0.086115 -0.154256 -0.154475 -0.132699 ... -0.070087 -0.088499 -0.075474 -0.081899 -0.074839 -0.062956 -0.074087 -0.068479 -0.044453 -0.051759
2018-12-19 2506.959961 7.826826 -0.015515 -0.057435 -0.077258 -0.061635 -0.070280 -0.166476 -0.175676 -0.147173 ... -0.090207 -0.104507 -0.092441 -0.098981 -0.091485 -0.074705 -0.097942 -0.088974 -0.061731 -0.064814
2018-12-20 2467.419922 7.810928 -0.015898 -0.073133 -0.091633 -0.077533 -0.104632 -0.185134 -0.187134 -0.167986 ... -0.111809 -0.118218 -0.098926 -0.115146 -0.105217 -0.083764 -0.112212 -0.104008 -0.074013 -0.078847
2018-12-21 2416.620117 7.790125 -0.020803 -0.074665 -0.088839 -0.091760 -0.107956 -0.205930 -0.208071 -0.193422 ... -0.134593 -0.140909 -0.126112 -0.135119 -0.124541 -0.104567 -0.131700 -0.126488 -0.098176 -0.100901
2018-12-24 2351.100098 7.762639 -0.027487 -0.081159 -0.118087 -0.134659 -0.128861 -0.237051 -0.235558 -0.224441 ... -0.162079 -0.170041 -0.154526 -0.161626 -0.156793 -0.140630 -0.157175 -0.153975 -0.122969 -0.128387
2018-12-25 2351.100098 7.762639 0.000000 -0.081245 -0.117730 -0.137916 -0.144406 -0.236654 -0.233902 -0.227261 ... -0.154500 -0.167624 -0.153743 -0.166127 -0.156106 -0.137723 -0.144689 -0.160004 -0.129512 -0.130633
2018-12-26 2467.699951 7.811042 0.048403 -0.017327 -0.074732 -0.112226 -0.106796 -0.188962 -0.182692 -0.178595 ... -0.109221 -0.118857 -0.112619 -0.116728 -0.089357 -0.087602 -0.098174 -0.110518 -0.089103 -0.073838
2018-12-27 2488.830078 7.819568 0.008526 0.007097 -0.066006 -0.101515 -0.108772 -0.172233 -0.170507 -0.168626 ... -0.100516 -0.095751 -0.105965 -0.105960 -0.084511 -0.086604 -0.088587 -0.102411 -0.079841 -0.065019
2018-12-28 2485.739990 7.818326 -0.001242 0.026658 -0.047977 -0.110895 -0.103678 -0.167932 -0.169533 -0.162729 ... -0.100268 -0.098268 -0.111870 -0.107486 -0.092498 -0.084807 -0.088985 -0.105146 -0.080217 -0.061613
2018-12-31 2506.850098 7.826782 0.008457 0.062601 -0.018528 -0.113320 -0.100806 -0.159080 -0.162973 -0.150259 ... -0.102593 -0.099805 -0.103361 -0.107342 -0.089188 -0.087132 -0.079508 -0.097707 -0.065733 -0.053157

12782 rows × 31 columns

In [7]:
df.mean()
Out[7]:
SPX         776.934755
log_SPX       6.116116
r             0.000258
5D_ret       -0.000260
10D_ret      -0.000486
20D_ret      -0.000942
40D_ret      -0.001849
60D_ret      -0.002725
80D_ret      -0.003611
100D_ret     -0.004556
120D_ret     -0.005577
140D_ret     -0.006578
160D_ret     -0.007482
180D_ret     -0.008373
200D_ret     -0.009346
220D_ret     -0.010253
240D_ret     -0.011175
260D_ret     -0.012126
280D_ret     -0.013103
300D_ret     -0.014102
320D_ret     -0.015031
340D_ret     -0.015950
360D_ret     -0.016895
380D_ret     -0.017846
400D_ret     -0.018800
420D_ret     -0.019776
440D_ret     -0.020718
460D_ret     -0.021566
480D_ret     -0.022396
500D_ret     -0.023174
520D_ret     -0.023886
dtype: float64

Explanation of resampling

In [8]:
full_series = df['20D_ret'].dropna()
avg, std = full_series.mean(), full_series.std()
bandwidth = round(1.06 * std * (len(full_series))**(-0.2), 4)
kde = KernelDensity(bandwidth=bandwidth)
kde.fit(full_series.to_frame())
r_series = full_series.sort_values().values
r_density = np.exp(kde.score_samples(r_series.reshape(-1, 1)))
In [9]:
plt.figure(figsize=(12, 10))
full_series.hist(bins=300, color='grey', density=True, grid=False)
plt.title("probability density of log-moneyness", fontsize=20)
plt.ylabel("value of pdf", fontsize=15)
plt.xlabel("x: log-moneyness", fontsize=15)
plt.plot(r_series, r_density, color='salmon', linewidth=4)
plt.legend(["kernel density", "empirical density"]);
plt.savefig('density.png')

Resample

In [24]:
def single_ols_resample(df, term_day, sample_size, max_trial, equidist=True, verbose=0):
    """
    Given a term_day, using different samples to solve for the optimal parameters, return all the results.
    
    :param df: pd.DataFrame, return of SPX
    :param term_day: int
    :param sample_size: int, how many points will each sample contain
    :param max_trial: how many different samples will be generated
    :param equidist: bool, whether a sample contains equidistant points
    :param verbose: 0, 1, or 2
        if verbose = 0, do not print any results of the optimizer
        if verbose = 1, print optimizer.success
        if verbose = 2, print the whole optimizer object
    """
    col_names = ['linear_a', 'linear_b', 'quad_a', 'quad_b', 'quad_c', 'quad_diff_a1', 'quad_diff_b',
                 'quad_diff_c', 'quad_diff_a2']
    result = pd.DataFrame()
    full_series = df[str(term_day)+'D_ret'].dropna()
    avg, std = full_series.mean(), full_series.std()
    bandwidth = round(1.06 * std * (len(full_series))**(-0.2), 4)
    kde = KernelDensity(bandwidth=bandwidth)
    kde.fit(full_series.to_frame())
    for n in range(max_trial):
        if equidist:
            rand = np.random.uniform(0.5, 1.2)
            r_series = np.array([elem for elem in np.linspace(avg-5*rand*std, avg+5*rand*std, sample_size)])
        else:
            r_series = np.random.choice(full_series, sample_size)
        coef_linear = np.array([0.0, 0.0])
        coef_quad = np.array([0.0, 0.0, 0.0])
        coef_quad_diff = np.array([0.0, 0.0, 0.0, 0.0])
        r_density = np.array([elem for elem in np.exp(kde.score_samples(r_series.reshape(-1, 1)))])
        linear = minimize(obj_func_least_square, (-1, 1), args=(r_series, r_density, term_day, 'linear'),
                               method="BFGS")
        quad = minimize(obj_func_least_square, (1, -1, 1), args=(r_series, r_density, term_day, 'quad'),
                             method="BFGS")
        quad_diff = minimize(obj_func_least_square, (1, -1, 1, 1), args=(r_series, r_density, term_day, 'quad_diff'),
                                  method="BFGS")
        if verbose == 1:
            print("Trial " + str(n) + ":", linear.success, quad.success, quad_diff.success)
        elif verbose == 2:
            print('Trial ' + str(n) + ":")
            print(linear)
            print(quad)
            print(quad_diff)
        tmp = pd.DataFrame([np.hstack([linear.x, quad.x, quad_diff.x])], columns=col_names)
        result = pd.concat([result, tmp], axis=0, ignore_index=True)
    return result

def full_ols_resample(df, terms, sample_size, max_trial, equidist=True, save_to_file=False):
    """
    Resample and compute the avg, std for each term_day
    
    :param df: pd.DataFrame, period return of SPX
    :param terms: dict, {term_str: term_day}
    :sample_size: int, how many points will each sample contain
    :param max_trial: how many different samples will be generated
    :param equidist: bool, whether a sample contains equidistant points
    :param save_to_file: whether each result will be saved
    """
    col_names = ['linear_a', 'linear_b', 'quad_a', 'quad_b', 'quad_c', 'quad_diff_a1', 'quad_diff_b',
             'quad_diff_c', 'quad_diff_a2']
    params_avg = []
    params_std = []
    num_window = len(terms)
    for term_str, term_day in terms.items():
        result = single_ols_resample(df, term_day=term_day, sample_size=100, max_trial=40)
        if save_to_file:
            result.to_csv("OLS_opt_params/demean_100_40/"+term_str+"_params.csv")
        tmp_params = result.mean().values.tolist()
        params_avg.append(tmp_params)
        params_std.append(result.std().values.tolist())
    avg_result = pd.DataFrame(params_avg)
    std_result = pd.DataFrame(params_std)
    avg_result.columns = std_result.columns = col_names
    avg_result.index = std_result.index = terms.keys()
    return avg_result, std_result
In [28]:
col_names = ['linear_a', 'linear_b', 'quad_a', 'quad_b', 'quad_c', 'quad_diff_a1', 'quad_diff_b',
         'quad_diff_c', 'quad_diff_a2']
params_avg = []
params_std = []
num_window = len(terms)
for term_str, term_day in terms.items():
    result = pd.read_csv("OLS_opt_params/demean_100_40/"+term_str+"_params.csv", index_col=0)
    tmp_params = result.mean().values.tolist()
    params_avg.append(tmp_params)
    params_std.append(result.std().values.tolist())
avg_result = pd.DataFrame(params_avg)
std_result = pd.DataFrame(params_std)
avg_result.columns = std_result.columns = col_names
avg_result.index = std_result.index = terms.keys()
ols_params, ols_param_std = avg_result, std_result
In [29]:
ols_params, ols_param_std = full_ols_resample(df, terms, 200, 40, save_to_file=True)
In [30]:
ols_params
Out[30]:
linear_a linear_b quad_a quad_b quad_c quad_diff_a1 quad_diff_b quad_diff_c quad_diff_a2
5D -0.384535 0.136667 18.357948 -0.486458 0.148560 23.313440 -0.485874 0.148404 11.625857
10D -0.328274 0.131634 10.126362 -0.421119 0.143802 13.364030 -0.417745 0.144026 6.193987
20D -0.274665 0.129889 5.105070 -0.353222 0.141916 6.762189 -0.356501 0.143053 3.733576
40D -0.186048 0.131322 2.148193 -0.232261 0.141385 2.034333 -0.231684 0.141182 2.218641
60D -0.167499 0.128775 1.704699 -0.212144 0.139993 1.819725 -0.214182 0.140447 1.673031
80D -0.114726 0.131753 1.113662 -0.145581 0.142504 0.849122 -0.142282 0.141405 1.217819
100D -0.066531 0.129377 1.147224 -0.091344 0.142550 0.909084 -0.089555 0.141379 1.206420
120D -0.092863 0.131604 0.887098 -0.121999 0.143330 1.223384 -0.125796 0.144601 0.687786
140D -0.102951 0.135028 0.733849 -0.134305 0.146643 1.073056 -0.137415 0.148027 0.526000
160D -0.126436 0.135013 0.664510 -0.164540 0.147200 1.031451 -0.166864 0.148579 0.375144
180D -0.135776 0.134901 0.588364 -0.174456 0.146761 0.899146 -0.180614 0.149281 0.419516
200D -0.152392 0.135948 0.511006 -0.193658 0.147572 0.830031 -0.198652 0.149970 0.273192
220D -0.153019 0.135781 0.475884 -0.196222 0.147361 0.784316 -0.199955 0.149229 0.188909
240D -0.161788 0.136692 0.401365 -0.201609 0.147366 0.684865 -0.211360 0.150906 0.236263
260D -0.156875 0.137836 0.380137 -0.196558 0.148551 0.655203 -0.207599 0.152642 0.244443
280D -0.155252 0.138961 0.337448 -0.193139 0.149630 0.548037 -0.203233 0.153402 0.252054
300D -0.152753 0.139690 0.299178 -0.189042 0.149989 0.506595 -0.199929 0.153768 0.207064
320D -0.148598 0.141233 0.284766 -0.185530 0.152099 0.457045 -0.195198 0.155723 0.210674
340D -0.138922 0.142982 0.267263 -0.174570 0.154128 0.348882 -0.179804 0.156014 0.236719
360D -0.132271 0.145040 0.227591 -0.163293 0.155395 0.206711 -0.162048 0.154870 0.237331
380D -0.125922 0.145790 0.206281 -0.153809 0.155735 0.129518 -0.148524 0.153937 0.243308
400D -0.112667 0.145631 0.193281 -0.135724 0.155174 0.053721 -0.124764 0.151543 0.244114
420D -0.098050 0.144948 0.189965 -0.117254 0.154421 -0.018262 -0.100051 0.148923 0.251456
440D -0.087994 0.143286 0.210587 -0.107000 0.154136 0.244122 -0.087932 0.314548 0.407746
460D -0.080446 0.140410 0.221465 -0.097538 0.151637 0.842062 -0.080889 0.651629 0.655207
480D -0.073312 0.137673 0.244838 -0.090924 0.150140 0.593429 -0.074965 0.485309 0.564414
500D -0.072082 0.135769 0.496813 -0.075503 0.401931 0.854527 -0.077182 0.630993 0.632390
520D -0.069145 0.133018 0.552170 -0.073462 0.447206 0.832339 -0.076390 0.587514 0.598933
In [31]:
ols_param_std
Out[31]:
linear_a linear_b quad_a quad_b quad_c quad_diff_a1 quad_diff_b quad_diff_c quad_diff_a2
5D 0.000547 0.000013 2.525069 0.020596 0.001813 2.560304 0.006274 0.001723 2.585604
10D 0.000708 0.000023 1.591324 0.021143 0.002142 1.577151 0.006430 0.001713 0.996706
20D 0.000886 0.000043 0.782928 0.016410 0.002041 0.746903 0.007837 0.001712 0.562025
40D 0.000534 0.000038 0.233136 0.006660 0.001248 0.169683 0.007091 0.001217 0.304292
60D 0.000375 0.000030 0.241548 0.008645 0.001822 0.125108 0.007064 0.001387 0.279643
80D 0.000528 0.000047 0.085127 0.002797 0.000897 0.066563 0.003767 0.001032 0.122600
100D 0.000423 0.000046 0.112058 0.002520 0.001399 0.068611 0.002968 0.001169 0.129621
120D 0.000265 0.000030 0.132491 0.005567 0.001952 0.173404 0.001806 0.001980 0.124604
140D 0.000274 0.000035 0.112762 0.006224 0.002040 0.154455 0.002155 0.002128 0.079759
160D 0.000331 0.000051 0.099204 0.007471 0.002088 0.129020 0.002510 0.002095 0.046984
180D 0.000361 0.000058 0.117645 0.010129 0.002725 0.137487 0.005975 0.002881 0.070581
200D 0.000371 0.000069 0.116291 0.011982 0.003019 0.117342 0.006470 0.003011 0.066915
220D 0.000379 0.000075 0.116931 0.013343 0.003284 0.115125 0.006461 0.003206 0.060552
240D 0.000271 0.000051 0.085257 0.010630 0.002617 0.083824 0.007667 0.002895 0.065966
260D 0.000267 0.000057 0.073518 0.009384 0.002377 0.079031 0.007211 0.002781 0.054917
280D 0.000425 0.000099 0.076380 0.010478 0.002751 0.068688 0.008149 0.002869 0.062093
300D 0.000298 0.000073 0.070946 0.010590 0.002802 0.059216 0.008402 0.002920 0.067505
320D 0.000175 0.000045 0.061077 0.009643 0.002640 0.049982 0.008027 0.002779 0.059920
340D 0.000228 0.000064 0.062583 0.010195 0.003012 0.044941 0.008593 0.002751 0.066449
360D 0.000292 0.000083 0.063855 0.010484 0.003344 0.041042 0.009188 0.002780 0.073945
380D 0.000343 0.000100 0.045839 0.007422 0.002517 0.028339 0.007069 0.002106 0.060303
400D 0.000293 0.000083 0.039497 0.005802 0.002253 0.021889 0.005307 0.001591 0.053873
420D 0.000490 0.000146 0.037525 0.004818 0.002193 0.019274 0.004606 0.001432 0.055760
440D 0.000462 0.000137 0.045526 0.005366 0.002752 0.663865 0.013298 0.373970 0.272213
460D 0.000153 0.000044 0.037881 0.004062 0.002276 0.905697 0.009082 0.518188 0.380306
480D 0.000637 0.000195 0.051092 0.004915 0.002990 0.890167 0.007322 0.503938 0.396762
500D 0.000659 0.000215 0.461733 0.023604 0.423231 0.865488 0.009425 0.503689 0.401395
520D 0.000605 0.000204 0.545052 0.035173 0.474007 0.869002 0.008415 0.510723 0.446089
In [32]:
ols_params.to_csv("ols_resample_params_shift.csv")
ols_param_std.to_csv("ols_resample_std_shift.csv")
In [16]:
ols_params = pd.read_csv("ols_resample_params_shift.csv", index_col=0)
ols_param_std = pd.read_csv("ols_resample_std_shift.csv", index_col=0)
In [14]:
def delta_skewness(params, T, method='linear'):
    d1, d2 = norm.ppf(0.2), norm.ppf(0.8)
#     K1, K2 = np.exp(x1), np.exp(x2)
    if method == 'linear':
        sigma_atm = params[1]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [params[0] * x + params[1] for x in [x1, x2]]
    elif method == 'quad':
        sigma_atm = params[2]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [params[0] * x**2 + params[1] * x + params[2] for x in [x1, x2]]
    elif method == 'quad_diff':
        sigma_atm = params[2]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [(params[0] * (x >= 0) + params[3] * (x < 0)) * x**2 + params[1] * x + params[2] for x in [x1, x2]]
    elif method == 'svi':
        a, b, m, rho, sigma = params
        sigma_atm = np.sqrt(a + b * (np.sqrt(m**2 + sigma**2) - rho * m))
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [np.sqrt(a + b * (rho * (x - m) + np.sqrt((x - m)**2 + sigma**2))) for x in [x1, x2]]
    return (y1 - y2) / (x1 - x2)
In [12]:
def plot_skew_term_structure(params_avg, params_std=None, legend=['whole_regime'], delta_skew=True, ylim=(-0.5, 0)):
    """
    :params_avg: list of pd.DataFrame, mean of optimal parameters (pd.DataFrame) for different regimes
        each DataFrame: index, ['5D', '10D', ...]; columns: ['linear_a', ..., 'quad_diff_a2']
        If a single DataFrame is inputed, automatically transform it to a list.
    :params_std: default None, list of pd.DataFrame, standard deviation of optimal parameters for different regimes
        each DataFrame: index, ['5D', '10D', ...]; columns: ['linear_a', ..., 'quad_diff_a2']
        If a single DataFrame is inputed, automatically transform it to a list.
    :param legend: input of plt.legend()def delta_skewness(params, T, method='linear'):
    d1, d2 = norm.ppf(0.2), norm.ppf(0.8)
#     K1, K2 = np.exp(x1), np.exp(x2)
    if method == 'linear':
        sigma_atm = params[1]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [params[0] * x + params[1] for x in [x1, x2]]
    elif method == 'quad':
        sigma_atm = params[2]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [params[0] * x**2 + params[1] * x + params[2] for x in [x1, x2]]
    elif method == 'quad_diff':
        sigma_atm = params[2]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [(params[0] * (x >= 0) + params[3] * (x < 0)) * x**2 + params[1] * x + params[2] for x in [x1, x2]] 
    elif method == 'svi':
        a, b, m, rho, sigma = params
        sigma_atm = np.sqrt(a + b * (np.sqrt(m**2 + sigma**2) - rho * m))
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [np.sqrt(a + b * (rho * (x - m) + np.sqrt((x - m)**2 + sigma**2))) for x in [x1, x2]]         
    return (y1 - y2) / (x1 - x2), for example can be ['high_vol regime', 'mid_vol regime', 'low_vol regime']
    :param delta_skew: bool, how to compute skewness
        If True, use the slope between 20 delta of call and put, delta_vol / delta_K, 
                where K1, K2 = exp(\Phi^{-1}(x)), x = 0.2 and 0.8
        if False, simply plot the first derivative of the vol curve, delta_vol / delta_ln(K)
    """
    if not isinstance(params_avg, list):
        params_avg = [params_avg]
    if params_std is not None and not isinstance(params_std, list):
        params_std = [params_std]
    x_axis = [int(str(i)[:-1]) for i in params_avg[0].index.tolist()]
    T = np.reshape(np.array(x_axis), (1, -1)) / 260
    colors = ['#C75B56', '#6C8DBF', '#60854B']
    cm = {'g':'#60854B', 'r':'#C75B56', 'b':'#6C8DBF'}

    plt.figure(figsize=(18, 5))
    plt.subplot(1, 3, 1)
    for i in range(len(params_avg)):
        params = (params_avg[i].values[:, :2]).T
        if delta_skew:
            y = list(delta_skewness(params, T, method='linear')[0])
            plt.plot(x_axis, y, color=colors[0])
        else:
            plt.plot(x_axis, params[0], color=colors[0])
        if params_std is not None:
            tmp = (2 * params_std[i].values[:, :2]).T
            if delta_skew:
                y_lower = list(delta_skewness(params-tmp, T, method='linear')[0])
                y_upper = list(delta_skewness(params+tmp, T, method='linear')[0])
            else:
                y_lower = params[0] - tmp[0]
                y_upper = params[0] + tmp[0]
                plt.fill_between(x_axis, y_lower, y_upper, color=colors[0], alpha=0.2)
    plt.legend(legend, frameon=False)
    plt.title("term structure of skewness (linear fit)")
    plt.xlabel('term')
    plt.ylabel('skewness')
    plt.ylim(ylim)

    plt.subplot(1, 3, 2)
    for i in range(len(params_avg)):
        params = (params_avg[i].values[:, 2:5]).T
        if delta_skew:
            y = list(delta_skewness(params, T, method='quad')[0])
            plt.plot(x_axis, y, color=colors[1])
        else:
            plt.plot(x_axis, params[1], color=colors[1])
        if params_std is not None:
            tmp = (2 * params_std[i].values[:, 2:5]).T
            if delta_skew:
                y_lower = list(delta_skewness(params-tmp, T, method='quad')[0])
                y_upper = list(delta_skewness(params+tmp, T, method='quad')[0])
            else:
                y_lower = params[1] - tmp[1]
                y_upper = params[1] + tmp[1]
            plt.fill_between(x_axis, y_lower, y_upper, color=colors[1], alpha=0.2)
    plt.legend(legend, frameon=False)
    plt.title("term structure of skewness (quad fit)")
    plt.xlabel('term')
    plt.ylabel('skewness')
    plt.ylim(ylim)

    plt.subplot(1, 3, 3)
    for i in range(len(params_avg)):
        params = (params_avg[i].values[:, 5:9]).T
        if delta_skew:
            y = list(delta_skewness(params, T, method='quad_diff')[0])
            plt.plot(x_axis, y, color=colors[2])
        else:
            plt.plot(x_axis, params[1], color=colors[2])
        if params_std is not None:
            tmp = 2 * (params_std[i].values[:, 5:9]).T
            if delta_skew:
                y_lower = list(delta_skewness(params-tmp, T, method='quad_diff')[0])
                y_upper = list(delta_skewness(params+tmp, T, method='quad_diff')[0])
            else:
                y_lower = params[1] - tmp[1]
                y_upper = params[1] + tmp[1]
            plt.fill_between(x_axis, y_lower, y_upper, color=colors[2], alpha=0.2)
    plt.legend(legend, frameon=False)
    plt.title("term structure of skewness (quad_diff fit)")
    plt.xlabel('term')
    plt.ylabel('skewness')
    plt.ylim(ylim)

#     plt.subplot(4, 1, 4)
#     for i in range(len(params_avg)):
#         params = (params_avg[i].values[:, 9:14]).T
#         if delta_skew:
#             y = list(delta_skewness(params, T, method='svi')[0])
#             plt.plot(x_axis, y, color=colors[i])
#         else:
#             plt.plot(x_axis, params[1], color=colors[i])
#         if params_std is not None:
#             tmp = 2 * (params_std[i].values[:, 9:14]).T
#             if delta_skew:
#                 y_lower = list(delta_skewness(params-tmp, T, method='svi')[0])
#                 y_upper = list(delta_skewness(params+tmp, T, method='svi')[0])
#             else:
#                 y_lower = params[1] - tmp[1]
#                 y_upper = params[1] + tmp[1]
#             plt.fill_between(x_axis, y_lower, y_upper, color=colors[i], alpha=0.2)
#     plt.legend(legend, frameon=False)
#     plt.title("term structure of skewness (svi fit)")
#     plt.xlabel('term')
#     plt.ylabel('skewness')
#     plt.ylim(-0.9, -0.3)
In [ ]:

In [ ]:

Parts that should be put into the presentation notebook

In [ ]:

In [ ]:

3.1.2 Bootstrapping (Resampling)

lower and upper bound are chosen randomly, covering both centered values and extreme values

  • $k$: uniformly distributed between 0.5 and 1.2
  • $r$ equidistant samples from $[\mu - 5 k \sigma, \mu + 5 k \sigma]$
  • In each sample, 200 points are selected.
  • 40 different sets of samples were used for fitting the curve.

Simply plot the slope parameter

new

In [43]:
plot_skew_term_structure(ols_params, ols_param_std, delta_skew=False, ylim=(-0.5, 0))

Using 20 and 80 delta to compute skewness

In [17]:
plot_skew_term_structure(ols_params, ols_param_std, delta_skew=True, ylim=(-0.5, 0))

Delete plots with long terms

In [21]:
plot_skew_term_structure(ols_params.iloc[:-5, :], ols_param_std.iloc[:-5, :], delta_skew=True, ylim=(-0.5, 0))