In [1]:
import sys
import time
import json
import warnings
import numpy as np
import scipy as sp
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import norm
from mpl_toolkits import mplot3d
from urllib.request import urlopen
from scipy.optimize import minimize
from sklearn.neighbors import KernelDensity
from pandas.plotting import register_matplotlib_converters
from scipy.optimize import basinhopping
register_matplotlib_converters()
warnings.filterwarnings('ignore')
In [2]:
def yahoo_parser(url):
    response = urlopen(url)
    data = json.loads(response.read().decode("utf-8"))['chart']['result'][0]
    t = pd.to_datetime(data['timestamp'], unit='s').date
    c = data['indicators']['quote'][0]['close']
    return pd.DataFrame({'close': c}, index=t)


def default_parser(url):
    return pd.read_csv(url, index_col=0, parse_dates=True)


def retrieve(source, code, label, column=None):
    query = {
        'yahoo': 'https://query1.finance.yahoo.com/v8/finance/chart/{}?period1=0&period2=5000000000&interval=1d',
    }
    parser = {

        'yahoo': yahoo_parser,
    }
    url = query.get(source).format(code)
    df = parser.get(source, default_parser)(url)
    df.index.name = None
    df.columns = [col.lower() for col in df.columns] if df.shape[1] > 1 else [label]
    if not column: column = df.columns
    all_dates = pd.date_range(df.index.min(), df.index.max(), freq='D')
    df = df.replace('.', None).reindex(all_dates).fillna(method='ffill')
    return df.astype('float')[column].squeeze()

def sigma_func_linear(x, a, b):
    return a * x + b

def sigma_func_quad(x, a, b, c):
    return a * x**2 + b * x + c

def sigma_func_quad_diff(x, a, b, c, a2):
    return a * x**2 * (x >= 0) + a2 * x**2 * (x < 0) + b * x + c

def SVIvol(params, x):
    a, b, m, rho, sigma = params
    return np.sqrt(a + b * (rho * (x - m) + np.sqrt((x - m)**2 + sigma**2)))

def SVIfail(params, T):
    a, b, m, rho, sigma = params
    return (abs(rho)>1) | (b<0) | (sigma<0) | (a+b*np.sqrt(1-rho**2)*sigma<0) | (b*(1+abs(rho)) > np.minimum(2/T,2))

def calc_call_premium(K, T, sigma):
    sigma = np.where(sigma>0, sigma, 1e-4)
    d1 = (np.log(1/K) + sigma**2 / 2 * T) / (sigma * np.sqrt(T))
    d2 = (np.log(1/K) - sigma**2 / 2 * T) / (sigma * np.sqrt(T))
    c = norm.cdf(d1) - K * norm.cdf(d2)
    return c
In [3]:
def calc_density(x, r, T, params, delta_x=0.001, method='linear'):
    """
    Approximate the density of x = log(K) using butterfly
    :param x: float / np.array(dtype=float), log return
    :param r: float
    :param params: array-like object, 
        linear: a, b
        quad: a, b, c
        quad_diff: a, b, c, a2
        svi: a, b, m, rho, sigma
    :delta_x: float, default 0.001
    :method: str, the analytical form of volatility function, can be 'linear'/'quad'/'quad_diff'/'svi'
    
    :return: float / np.array(dtype=float), a series of density
    """
    # params: a, b, c, a2
    if method == 'linear' and len(params) == 2:
        x1, x2, a, b = x + np.log(1 + delta_x), x + np.log(1 - delta_x), params[0], params[1]
        sigma1, sigma, sigma2 = a * x1 + b,\
                                a * x + b,\
                                a * x2 + b
    elif method == 'quad' and len(params) == 3:
        x1, x2, a, b, c = x + np.log(1 + delta_x), x + np.log(1 - delta_x), params[0], params[1], params[2]
        sigma1, sigma, sigma2 = a * x1**2 + b * x1 + c,\
                                a * x**2 + b * x + c,\
                                a * x2**2 + b * x2 + c
    elif method == 'quad_diff' and len(params) == 4:
        x1, x2, a, b, c, a2 = x + np.log(1 + delta_x), x + np.log(1 - delta_x), params[0], params[1], params[2], params[3]
        sigma1, sigma, sigma2 = a * x1**2 * (x1 >= 0) + a2 * x1**2 * (x1 < 0) + b * x1 + c,\
                                a * x**2 * (x >= 0) + a2 * x**2 * (x < 0) + b * x + c,\
                                a * x2**2 * (x2 >= 0) + a2 * x2**2 * (x2 < 0) + b * x2 + c
    elif method == 'svi' and len(params) == 5:
        x1, x2 = x + np.log(1 + delta_x), x + np.log(1 - delta_x)
        sigma1, sigma, sigma2 = SVIvol(params, x1), SVIvol(params, x), SVIvol(params, x2)
    else:
        raise Exception("Wrong param dimension or fit method!")

    f = np.exp(r * T) * (calc_call_premium(np.exp(x1), T, sigma1)
                         - 2 * calc_call_premium(np.exp(x), T, sigma)
                         + calc_call_premium(np.exp(x2), T, sigma2)) / (np.exp(x) * delta_x**2)
    return f

def obj_func_least_square(params, r_series, r_density, term_day, method):
    """
    Compute mean of squared errors (avg(SSE)) given the true density
    :param params: parameters in volatility function
    :r_series: array-like, the input in the volatility function, used to predict density
    :r_density: array-like, true density
    :term_day: int, T
    :method: str, the analytical form of volatility function
    """
    density = calc_density(r_series, r=0, T=term_day/260, params=params, delta_x=0.001, method=method)
    return np.mean((r_density - density)**2)

Resample

In [4]:
def single_ols_resample(df, term_day, sample_size, max_trial, equidist=True, verbose=0):
    """
    Given a term_day, using different samples to solve for the optimal parameters, return all the results.
    
    :param df: pd.DataFrame, return of SPX
    :param term_day: int
    :param sample_size: int, how many points will each sample contain
    :param max_trial: how many different samples will be generated
    :param equidist: bool, whether a sample contains equidistant points
    :param verbose: 0, 1, or 2
        if verbose = 0, do not print any results of the optimizer
        if verbose = 1, print optimizer.success
        if verbose = 2, print the whole optimizer object
    """
    col_names = ['linear_a', 'linear_b', 'quad_a', 'quad_b', 'quad_c', 'quad_diff_a1', 'quad_diff_b',
                 'quad_diff_c', 'quad_diff_a2']
    result = pd.DataFrame()
    full_series = df[str(term_day)+'D_ret'].dropna()
    avg, std = full_series.mean(), full_series.std()
    bandwidth = round(1.06 * std * (len(full_series))**(-0.2), 4)
    kde = KernelDensity(bandwidth=bandwidth)
    kde.fit(full_series.to_frame())
    for n in range(max_trial):
        if equidist:
            rand = np.random.uniform(0.5, 1.2)
            r_series = np.array([elem for elem in np.linspace(avg-5*rand*std, avg+5*rand*std, sample_size)])
        else:
            r_series = np.random.choice(full_series, sample_size)
        coef_linear = np.array([0.0, 0.0])
        coef_quad = np.array([0.0, 0.0, 0.0])
        coef_quad_diff = np.array([0.0, 0.0, 0.0, 0.0])
        r_density = np.array([elem for elem in np.exp(kde.score_samples(r_series.reshape(-1, 1)))])
        linear = minimize(obj_func_least_square, (-1, 1), args=(r_series, r_density, term_day, 'linear'),
                               method="BFGS")
        quad = minimize(obj_func_least_square, (1, -1, 1), args=(r_series, r_density, term_day, 'quad'),
                             method="BFGS")
        quad_diff = minimize(obj_func_least_square, (1, -1, 1, 1), args=(r_series, r_density, term_day, 'quad_diff'),
                                  method="BFGS")
        if verbose == 1:
            print("Trial " + str(n) + ":", linear.success, quad.success, quad_diff.success)
        elif verbose == 2:
            print('Trial ' + str(n) + ":")
            print(linear)
            print(quad)
            print(quad_diff)
        tmp = pd.DataFrame([np.hstack([linear.x, quad.x, quad_diff.x])], columns=col_names)
        result = pd.concat([result, tmp], axis=0, ignore_index=True)
    return result

def full_ols_resample(df, terms, sample_size, max_trial, equidist=True, save_to_file=False):
    col_names = ['linear_a', 'linear_b', 'quad_a', 'quad_b', 'quad_c', 'quad_diff_a1', 'quad_diff_b',
                 'quad_diff_c', 'quad_diff_a2']
    """
    Resample and compute the avg, std for each term_day
    
    :param df: pd.DataFrame, period return of SPX
    :param terms: dict, {term_str: term_day}
    :sample_size: int, how many points will each sample contain
    :param max_trial: how many different samples will be generated
    :param equidist: bool, whether a sample contains equidistant points
    :param save_to_file: whether each result will be saved
    """
    params_avg = []
    params_std = []
    num_window = len(terms)
    for term_str, term_day in terms.items():
        result = single_ols_resample(df, term_day=term_day, sample_size=100, max_trial=40)
        if save_to_file:
            result.to_csv("OLS_opt_params/100_40/"+term_str+"_params.csv")
        tmp_params = result.mean().values.tolist()
        params_avg.append(tmp_params)
        params_std.append(result.std().values.tolist())
    avg_result = pd.DataFrame(params_avg)
    std_result = pd.DataFrame(params_std)
    avg_result.columns = std_result.columns = col_names
    avg_result.index = std_result.index = terms.keys()
    return avg_result, std_result

Compute the skewness using 20 delta

In [43]:
def delta_skewness(params, T, method='linear'):
    d1, d2 = norm.ppf(0.2), norm.ppf(0.8)
#     K1, K2 = np.exp(x1), np.exp(x2)
    if method == 'linear':
        sigma_atm = params[1]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [params[0] * x + params[1] for x in [x1, x2]]
    elif method == 'quad':
        sigma_atm = params[2]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [params[0] * x**2 + params[1] * x + params[2] for x in [x1, x2]]
    elif method == 'quad_diff':
        sigma_atm = params[2]
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [(params[0] * (x >= 0) + params[3] * (x < 0)) * x**2 + params[1] * x + params[2] for x in [x1, x2]]
    elif method == 'svi':
        a, b, m, rho, sigma = params
        sigma_atm = np.sqrt(a + b * (np.sqrt(m**2 + sigma**2) - rho * m))
        x1, x2 = [(sigma_atm * np.sqrt(T) / 2 - d) * sigma_atm * np.sqrt(T) for d in [d1, d2]]
        y1, y2 = [np.sqrt(a + b * (rho * (x - m) + np.sqrt((x - m)**2 + sigma**2))) for x in [x1, x2]]
    return (y1 - y2) / (x1 - x2)




def plot_skew_term_structure(params_avg, params_std=None, legend=['whole_regime'], delta_skew=True):
    """
    :params_avg: list of pd.DataFrame, mean of optimal parameters (pd.DataFrame) for different regimes
        each DataFrame: index, ['5D', '10D', ...]; columns: ['linear_a', ..., 'quad_diff_a2']
        If a single DataFrame is inputed, automatically transform it to a list.
    :params_std: default None, list of pd.DataFrame, standard deviation of optimal parameters for different regimes
        each DataFrame: index, ['5D', '10D', ...]; columns: ['linear_a', ..., 'quad_diff_a2']
        If a single DataFrame is inputed, automatically transform it to a list.
    :param legend: input of plt.legend(), for example can be ['high_vol regime', 'mid_vol regime', 'low_vol regime']
    :param delta_skew: bool, how to compute skewness
        If True, use the slope between 20 delta of call and put, delta_vol / delta_K, 
                where K1, K2 = exp(\Phi^{-1}(x)), x = 0.2 and 0.8
        if False, simply plot the first derivative of the vol curve, delta_vol / delta_ln(K)
    """
    if not isinstance(params_avg, list):
        params_avg = [params_avg]
    if params_std is not None and not isinstance(params_std, list):
        params_std = [params_std]
    x_axis = [int(str(i)[:-1]) for i in params_avg[0].index.tolist()]
    T = np.reshape(np.array(x_axis), (1, -1)) / 260
    colors = ['#C75B56', '#6C8DBF', '#60854B']

    plt.figure(figsize=(18, 5))
    plt.subplot(1, 3, 1)
    for i in range(len(params_avg)):
        params = (params_avg[i].values[:, :2]).T
        if delta_skew:
            y = list(delta_skewness(params, T, method='linear')[0])
            plt.plot(x_axis, y, color=colors[i])
        else:
            plt.plot(x_axis, params[0], color=colors[i])
        if params_std is not None:
            tmp = (2 * params_std[i].values[:, :2]).T
            if delta_skew:
                y_lower = list(delta_skewness(params-tmp, T, method='linear')[0])
                y_upper = list(delta_skewness(params+tmp, T, method='linear')[0])
            else:
                y_lower = params[0] - tmp[0]
                y_upper = params[0] + tmp[0]
                plt.fill_between(x_axis, y_lower, y_upper, color=colors[0], alpha=0.2)
    plt.legend(legend, frameon=False)
    plt.title("term structure of skewness (linear fit)")
    plt.xlabel('term')
    plt.ylabel('skewness')
    plt.ylim(-0.9, 0.2)

    plt.subplot(1, 3, 2)
    for i in range(len(params_avg)):
        params = (params_avg[i].values[:, 2:5]).T
        if delta_skew:
            y = list(delta_skewness(params, T, method='quad')[0])
            plt.plot(x_axis, y, color=colors[i])
        else:
            plt.plot(x_axis, params[1], color=colors[i])
        if params_std is not None:
            tmp = (2 * params_std[i].values[:, 2:5]).T
            if delta_skew:
                y_lower = list(delta_skewness(params-tmp, T, method='quad')[0])
                y_upper = list(delta_skewness(params+tmp, T, method='quad')[0])
            else:
                y_lower = params[1] - tmp[1]
                y_upper = params[1] + tmp[1]
            plt.fill_between(x_axis, y_lower, y_upper, color=colors[i], alpha=0.2)
    plt.legend(legend, frameon=False)
    plt.title("term structure of skewness (quad fit)")
    plt.xlabel('term')
    plt.ylabel('skewness')
    plt.ylim(-0.9, 0.2)

    plt.subplot(1, 3, 3)
    for i in range(len(params_avg)):
        params = (params_avg[i].values[:, 5:9]).T
        if delta_skew:
            y = list(delta_skewness(params, T, method='quad_diff')[0])
            plt.plot(x_axis, y, color=colors[i])
        else:
            plt.plot(x_axis, params[1], color=colors[i])
        if params_std is not None:
            tmp = 2 * (params_std[i].values[:, 5:9]).T
            if delta_skew:
                y_lower = list(delta_skewness(params-tmp, T, method='quad_diff')[0])
                y_upper = list(delta_skewness(params+tmp, T, method='quad_diff')[0])
            else:
                y_lower = params[1] - tmp[1]
                y_upper = params[1] + tmp[1]
            plt.fill_between(x_axis, y_lower, y_upper, color=colors[i], alpha=0.2)
    plt.legend(legend, frameon=False)
    plt.title("term structure of skewness (quad_diff fit)")
    plt.xlabel('term')
    plt.ylabel('skewness')
    plt.ylim(-0.9, 0.2)
In [6]:
raw_df = retrieve('yahoo', '^GSPC', 'SPX').to_frame()
raw_df.index = pd.to_datetime(raw_df.index)
df = raw_df.copy()
df = df.reindex(pd.date_range(start="1970-01-02", end="2018-12-31", freq='B'))
df['log_SPX'] = np.log(df["SPX"])
df['r'] = df['log_SPX'].diff()
term_days = [5, 10] + list(range(20, 521, 20))
term_name = [str(i) + "D" for i in term_days]
terms = dict(zip(term_name, term_days))
raw_df.head(5)
# for k, v in terms.items(): 
#     df[k + '_ret'] = df["r"].rolling(v).sum()
Out[6]:
SPX
1970-01-02 93.000000
1970-01-03 93.000000
1970-01-04 93.000000
1970-01-05 93.459999
1970-01-06 92.820000
In [7]:
df1=df.copy()
mov_vol = df1['r'].rolling(260).std()*(260)**0.5
fig = plt.figure(figsize=(10, 8))
mov_vol.plot(color='#C75B56')
plt.axhline(y=0.12, color='black')
plt.axhline(y=0.22, color='black')
plt.text(-800, 0.12, "0.12", bbox=dict(facecolor="w",alpha=0.5),fontsize=15)
plt.text(-800, 0.22, "0.22", bbox=dict(facecolor="w",alpha=0.5),fontsize=15)
plt.title('Volatility Regime')
plt.xlabel('day');
plt.ylabel('volatility');

Compute forward return

In [8]:
for k, v in terms.items():
    df1[k + '_ret'] = (df1['r'].rolling(v).sum()).shift(-v)
    df1[k + '_ret'] = df1[k + '_ret']- np.log(np.mean(np.exp(df1[k + '_ret'])))
df1.dropna(inplace=True)
df1.tail(5)
Out[8]:
SPX log_SPX r 5D_ret 10D_ret 20D_ret 40D_ret 60D_ret 80D_ret 100D_ret ... 340D_ret 360D_ret 380D_ret 400D_ret 420D_ret 440D_ret 460D_ret 480D_ret 500D_ret 520D_ret
2016-12-27 2268.879883 7.727042 0.002246 -0.006424 -0.003046 -0.001253 0.029160 0.013801 0.006594 0.024597 ... 0.066233 0.061898 0.083050 0.079261 0.095752 0.102773 0.105766 0.013562 0.007150 -0.130633
2016-12-28 2249.919922 7.718650 -0.008392 0.007673 0.008171 0.015133 0.036469 0.024081 0.013267 0.014643 ... 0.075456 0.074342 0.087407 0.080534 0.103881 0.108358 0.114869 0.032747 0.038256 -0.073838
2016-12-29 2249.260010 7.718357 -0.000293 0.007196 0.006317 0.014691 0.037182 0.023313 0.021090 0.018616 ... 0.070006 0.073779 0.090169 0.089538 0.102732 0.104992 0.106960 0.043543 0.036364 -0.065019
2016-12-30 2238.830078 7.713709 -0.004648 0.015354 0.012813 0.018472 0.043322 0.027117 0.022698 0.030009 ... 0.066081 0.075791 0.093799 0.095264 0.100241 0.107424 0.106064 0.041854 0.049149 -0.061613
2017-01-02 2238.830078 7.713709 0.000000 0.011799 0.012813 0.012444 0.044339 0.026097 0.033480 0.035156 ... 0.066137 0.083151 0.091671 0.094236 0.096227 0.109320 0.105669 0.047438 0.060031 -0.053157

5 rows × 31 columns

Run 3 csv (take a long time to resample)

In [10]:
cutoff=[0.12,0.22]
low=df1[mov_vol<=cutoff[0]]
#print(low)
#print(len(low))
medium=df1[(mov_vol>cutoff[0]) & (mov_vol<cutoff[1])]
#print(len(medium))
high=df1[mov_vol>=cutoff[1]]
#print(len(high))

regime=[low,medium,high]
names=["low","medium","high"]
# ols_params_low, ols_param_std_low = full_ols_resample(low, terms, 200, 40, save_to_file=False)
# ols_params_medium, ols_param_std_medium = full_ols_resample(medium, terms, 200, 40, save_to_file=False)
ols_params_high, ols_param_std_high = full_ols_resample(high, terms, 200, 40, save_to_file=False)

# ols_params_low.to_csv("ols_resample_params_low.csv")
#ols_param_std_low.to_csv("ols_resample_std_low.csv")
#ols_params_medium.to_csv("ols_resample_params_medium.csv")
#ols_param_std_medium.to_csv("ols_resample_std_medium.csv")
ols_params_high.to_csv("ols_resample_params_high.csv")
ols_param_std_high.to_csv("ols_resample_std_high.csv")
In [44]:
ols_params=[]
ols_params.append(pd.read_csv("ols_resample_params_low.csv",index_col=0).iloc[:-5, :])

ols_params.append(pd.read_csv("ols_resample_params_medium.csv",index_col=0).iloc[:-5, :])
ols_params.append(pd.read_csv("ols_resample_params_high.csv",index_col=0).iloc[:-5, :])

ols_params_std=[]
ols_params_std.append(pd.read_csv("ols_resample_std_low.csv",index_col=0).iloc[:-5, :])

ols_params_std.append(pd.read_csv("ols_resample_std_medium.csv",index_col=0).iloc[:-5, :])
ols_params_std.append(pd.read_csv("ols_resample_std_high.csv",index_col=0).iloc[:-5, :])
plot_skew_term_structure(ols_params, ols_params_std, legend=['low','medium','high'],delta_skew=True)
In [45]:
plot_skew_term_structure(ols_params, ols_params_std, legend=['low','medium','high'],delta_skew=False)
In [23]:
def draw_vol_curve_diffregimes(df,window,ylim_down=-0.5,ylim_up=2):
    fig = plt.figure(figsize=(30, 8))
    index = 1
    coefs_linear_ols = []
    coefs_quad_ols = []
    coefs_quad_diff_ols = []

    df1=df.copy()
    mov_vol = df1['r'].rolling(260).std()*(260)**0.5
    df1[str(window)+'D_rtn_forward']=(df1['r'].rolling(window).sum()).shift(-window)
    df1[str(window)+'D_rtn_forward'] = df1[str(window)+'D_rtn_forward']- np.log(np.mean(np.exp(df1[str(window)+'D_rtn_forward'])))
    cutoff=[0.12,0.22]
    df1.dropna(inplace=True)
    low=df1[mov_vol<cutoff[0]][str(window)+'D_rtn_forward']
    print(len(low))
    medium=df1[(mov_vol>cutoff[0]) & (mov_vol<cutoff[1])][str(window)+'D_rtn_forward']
    print(len(medium))
    high=df1[mov_vol>cutoff[1]][str(window)+'D_rtn_forward']
    print(len(high))


    regime=[low,medium,high]
    names=["low","medium","high"]
    name_index=0
    for i in regime:
        avg, std = i.mean(), i.std()
        bandwidth = round(1.06 * std * (len(i))**(-0.2), 4)
        r_series = np.array([round(elem, 4) for elem in np.arange(avg - 5 * std, avg + 5 * std, 0.2 * std)])
        kde = KernelDensity(bandwidth = bandwidth)
        kde.fit(i.to_frame())
        r_density = np.array([round(elem, 4) for elem in np.exp(kde.score_samples(np.array(r_series).reshape(-1, 1)))])
        coef_linear_ols = minimize(obj_func_least_square, (-1, 1), args=(r_series, r_density, window, 'linear'), method="BFGS")
        coef_quad_ols = minimize(obj_func_least_square, (1, -1, 1), args=(r_series, r_density, window, 'quad'), method="BFGS")
        coef_quad_diff_ols = minimize(obj_func_least_square, (1, -1, 1, 1), args=(r_series, r_density, window, 'quad_diff'),
                                      method="BFGS")
        coefs_linear_ols.append(coef_linear_ols.x)
        coefs_quad_ols.append(coef_quad_ols.x)
        coefs_quad_diff_ols.append(coef_quad_diff_ols.x)
        linear_fit_ols = np.round(calc_density(r_series, r=0, T=window/260, params=coef_linear_ols.x, delta_x=0.01, method='linear'), 4)
        quad_fit_ols = np.round(calc_density(r_series, r=0, T=window/260, params=coef_quad_ols.x, delta_x=0.01, method='quad'), 4)
        quad_diff_fit_ols = np.round(calc_density(r_series, r=0, T=window/260, params=coef_quad_diff_ols.x, delta_x=0.01, method='quad_diff'), 4)

        #plt.subplot(num_window*3, 3, index)
    #     plt.subplot(3,1, index)
    #     plt.hist(i, bins=100, density=True)
    #     plt.plot(r_series, linear_fit_ols)
    #     plt.plot(r_series, quad_fit_ols)
    #     plt.plot(r_series, quad_diff_fit_ols)
    #     plt.legend(['linear fit', 'quad fit', 'quad diff fit', 'empirical density'])
    #     plt.title("term_str"+" fitted density"+names[name_index])
    #     name_index+=1
    #     index += 1

    fig = plt.figure(figsize=(18, 5))
    index = 1
    colors=['#C75B56','#6C8DBF','#60854B']
    for index in range(3):
        plt.subplot(1, 3, index+1)
        x = np.linspace(-0.5, 0.5, 41)
        a, b = coef_linear_ols = coefs_linear_ols[index]
        plt.plot(x, a*x+b,color=colors[0])
        a, b, c = coefs_quad_ols[index]
        plt.plot(x, a*x**2 + b*x + c,color=colors[1])
        a, b, c, a2 = coefs_quad_diff_ols[index]
        plt.plot(x, (a*(x>=0)+a2*(x<0))*x**2+b*x+c,color=colors[2])
        plt.legend(['linear', 'quad', 'quad diff'])
        plt.title(str(window)+"D volatility plot "+names[index%3])
        plt.ylim((ylim_down,ylim_up))
        plt.xlabel("log moneyness(K/S)")
        plt.ylabel("volatility");
        index += 1
    plt.show();
In [24]:
draw_vol_curve_diffregimes(df,20);
4506
6543
1453
<Figure size 2160x576 with 0 Axes>
In [25]:
draw_vol_curve_diffregimes(df,260,-0.2,1);
4436
6373
1453
<Figure size 2160x576 with 0 Axes>
In [ ]: