Create multiple columns in Pandas Dataframe from one function

前端 未结 3 1911
失恋的感觉
失恋的感觉 2021-02-08 00:10

I\'m a python newbie, so I hope my two questions are clear and complete. I posted the actual code and a test data set in csv format below.

I\'ve been able to construct t

3条回答
  •  悲哀的现实
    2021-02-08 00:49

    The trick to vectorize code is to not think in terms of rows, but instead think in terms of columns.

    I almost have this working (I'll try to finish it later), but you want to do something along the lines of this:

    from datetime import datetime
    from math import sqrt, pi, log, exp, isnan
    from numpy import inf, nan
    from scipy.stats import norm
    import pandas as pd
    from pandas import Timestamp
    from pandas.tseries.holiday import USFederalHolidayCalendar
    
    # Initial parameters
    rf = .0015                          # Get Fed Funds Rate https://research.stlouisfed.org/fred2/data/DFF.csv
    tradingMinutesDay = 450             # 7.5 hours per day * 60 minutes per hour
    tradingMinutesAnnum = 113400        # trading minutes per day * 252 trading days per year
    cal = USFederalHolidayCalendar()    # Load US Federal holiday calendar
    two_pi = 2 * pi                     # 2 * Pi (to reduce computations)
    threshold = 1.0e-9                  # convergence threshold.
    
    # Create sample data:
    col_order = ['TimeStamp', 'OpraSymbol', 'RootSymbol', 'Expiry', 'Strike', 'OptType', 'RootPrice', 'Last', 'Bid', 'Ask', 'Volume', 'OpenInt', 'IV']
    df = pd.DataFrame({'Ask': {0: 3.7000000000000002, 1: 2.4199999999999999, 2: 3.0, 3: 2.7999999999999998, 4: 2.4500000000000002, 5: 3.25, 6: 5.9500000000000002, 7: 6.2999999999999998},
                       'Bid': {0: 3.6000000000000001, 1: 2.3399999999999999, 2: 2.8599999999999999, 3: 2.7400000000000002, 4: 2.4399999999999999, 5: 3.1000000000000001, 6: 5.7000000000000002, 7: 6.0999999999999996},
                       'Expiry': {0: Timestamp('2015-10-16 16:00:00'), 1: Timestamp('2015-10-16 16:00:00'), 2: Timestamp('2015-10-16 16:00:00'), 3: Timestamp('2015-10-16 16:00:00'), 4: Timestamp('2015-10-16 16:00:00'), 5: Timestamp('2015-10-16 16:00:00'), 6: Timestamp('2015-11-20 16:00:00'), 7: Timestamp('2015-11-20 16:00:00')},
                       'IV': {0: 0.3497, 1: 0.3146, 2: 0.3288, 3: 0.3029, 4: 0.3187, 5: 0.2926, 6: 0.3635, 7: 0.3842},
                       'Last': {0: 3.46, 1: 2.34, 2: 3.0, 3: 2.81, 4: 2.35, 5: 3.20, 6: 5.90, 7: 6.15},
                       'OpenInt': {0: 1290.0, 1: 3087.0, 2: 28850.0, 3: 44427.0, 4: 2318.0, 5: 3773.0, 6: 17112.0, 7: 15704.0},
                       'OpraSymbol': {0: 'AAPL151016C00109000', 1: 'AAPL151016P00109000', 2: 'AAPL151016C00110000', 3: 'AAPL151016P00110000', 4: 'AAPL151016C00111000', 5: 'AAPL151016P00111000', 6: 'AAPL151120C00110000', 7: 'AAPL151120P00110000'},
                       'OptType': {0: 'C', 1: 'P', 2: 'C', 3: 'P', 4: 'C', 5: 'P', 6: 'C', 7: 'P'},
                       'RootPrice': {0: 109.95, 1: 109.95, 2: 109.95, 3: 109.95, 4: 109.95, 5: 109.95, 6: 109.95, 7: 109.95},
                       'RootSymbol': {0: 'AAPL', 1: 'AAPL', 2: 'AAPL', 3: 'AAPL', 4: 'AAPL', 5: 'AAPL', 6: 'AAPL', 7: 'AAPL'},
                       'Strike': {0: 109.0, 1: 109.0, 2: 110.0, 3: 110.0, 4: 111.0, 5: 111.0, 6: 110.0, 7: 110.0},
                       'TimeStamp': {0: Timestamp('2015-09-30 16:00:00'), 1: Timestamp('2015-09-30 16:00:00'), 2: Timestamp('2015-09-30 16:00:00'), 3: Timestamp('2015-09-30 16:00:00'), 4: Timestamp('2015-09-30 16:00:00'), 5: Timestamp('2015-09-30 16:00:00'), 6: Timestamp('2015-09-30 16:00:00'), 7: Timestamp('2015-09-30 16:00:00')},
                       'Volume': {0: 1565.0, 1: 3790.0, 2: 10217.0, 3: 12113.0, 4: 6674.0, 5: 2031.0, 6: 5330.0, 7: 3724.0}})
    df = df[col_order]
    
    # Vectorize columns
    df['mark'] = (df.Bid + df.Ask) / 2
    df['cp'] = df.OptType.map({'C': 1, 'P': -1})
    df['Log_S_K'] = (df.RootPrice / df.Strike).apply(log)
    df['divs'] = 0  # TODO: Get dividend value.
    df['vega'] = 0.
    df['converged'] = False
    
    # Vectorized datetime calculations
    date_pairs = set(zip(df.TimeStamp, df.Expiry))
    total_days = {(t1, t2): len(pd.bdate_range(t1, t2)) 
                            for t1, t2 in date_pairs}
    hols = {(t1, t2): len(cal.holidays(t1, t2).to_pydatetime()) 
                      for t1, t2 in date_pairs}
    del date_pairs
    
    df['total_days'] = [total_days.get((t1, t2))
                        for t1, t2 in zip(df.TimeStamp, df.Expiry)]
    df['hols'] = [hols.get((t1, t2))
                  for t1, t2 in zip(df.TimeStamp, df.Expiry)]
    df['days_to_exp'] = df.total_days - df.hols - 1
    df.loc[df.days_to_exp < 0, 'days_to_exp'] = 0  # Min zero.
    df.drop(['total_days', 'hols'], axis='columns', inplace=True)
    df['years_to_expiry'] = (df.days_to_exp * tradingMinutesDay / tradingMinutesAnnum)
    
    # Initial implied vol 'guess'
    df['implied_vol'] = (two_pi / df.years_to_expiry) ** 0.5 * df.mark / df.RootPrice  
    
    for i in xrange(100):  # range(100) in Python 3.x
        # Create mask of options where the vol has not converged.
        mask = [not c for c in df.converged.values]
        if df.converged.all():
            break
    
        # Aliases.
        data = df.loc[mask, :]
        cp = data.cp
        mark = data.mark
        S = data.RootPrice
        K = data.Strike
        d = data.divs
        T = data.years_to_expiry
        log_S_K = data.Log_S_K
        iv = data.implied_vol
    
        # Calcs.
        d1 = (log_S_K + T * (rf - d + .5 * iv ** 2)) / (iv * T ** 0.5)
        d2 = d1 - iv * T ** 0.5
        df.loc[mask, 'vega'] = vega = S * d1.apply(norm.pdf) * T ** 0.5
        model = cp * (S * (cp * d1).apply(norm.cdf)
                      - K * (-rf * T).apply(exp) * (cp * d2).apply(norm.cdf))
        iv_delta = (model - mark) / vega
        df.loc[mask, 'implied_vol'] = iv - iv_delta
    
        # Clean-up and check for convergence.
        df.loc[df.implied_vol < 0, 'implied_vol'] = 0
        idx = model[(model - mark).abs() < threshold].index
        df.ix[idx, 'converged'] = True
        df.loc[:, 'implied_vol'].fillna(0, inplace=True)
        df.loc[:, 'implied_vol'].replace([inf, -inf], nan, inplace=True)
        df.loc[:, 'vega'].fillna(0, inplace=True)
        df.loc[:, 'vega'].replace([inf, -inf], nan, inplace=True)
    

提交回复
热议问题