问题
I was defining a function Heiken Ashi which is one of the popular chart type in Technical Analysis. I was writing a function on it using Pandas but finding little difficulty. This is how Heiken Ashi [HA] looks like-
Heikin-Ashi Candle Calculations
HA_Close = (Open + High + Low + Close) / 4
HA_Open = (previous HA_Open + previous HA_Close) / 2
HA_Low = minimum of Low, HA_Open, and HA_Close
HA_High = maximum of High, HA_Open, and HA_Close
Heikin-Ashi Calculations on First Run
HA_Close = (Open + High + Low + Close) / 4
HA_Open = (Open + Close) / 2
HA_Low = Low
HA_High = High
There is a lot of stuff available on various websites using for loop and pure python but i think Pandas can also do job well. This is my progress-
def HA(df):
df['HA_Close']=(df['Open']+ df['High']+ df['Low']+ df['Close'])/4
ha_o=df['Open']+df['Close'] #Creating a Variable
#(for 1st row)
HA_O=df['HA_Open'].shift(1)+df['HA_Close'].shift(1) #Another variable
#(for subsequent rows)
df['HA_Open']=[ha_o/2 if df['HA_Open']='nan' else HA_O/2]
#(error Part Where am i going wrong?)
df['HA_High']=df[['HA_Open','HA_Close','High']].max(axis=1)
df['HA_Low']=df[['HA_Open','HA_Close','Low']].min(axis=1)
return df
Can Anyone Help me with this please?` It doesnt work.... I tried on this-
import pandas_datareader.data as web
import HA
import pandas as pd
start='2016-1-1'
end='2016-10-30'
DAX=web.DataReader('^GDAXI','yahoo',start,end)
This is the New Code i wrote
def HA(df):
df['HA_Close']=(df['Open']+ df['High']+ df['Low']+df['Close'])/4
...: ha_o=df['Open']+df['Close']
...: df['HA_Open']=0.0
...: HA_O=df['HA_Open'].shift(1)+df['HA_Close'].shift(1)
...: df['HA_Open']= np.where( df['HA_Open']==np.nan, ha_o/2, HA_O/2 )
...: df['HA_High']=df[['HA_Open','HA_Close','High']].max(axis=1)
...: df['HA_Low']=df[['HA_Open','HA_Close','Low']].min(axis=1)
...: return df
But still the HA_Open result was not satisfactory
回答1:
Here is the fastest, accurate and efficient implementation as per my tests:
def HA(df):
df['HA_Close']=(df['Open']+ df['High']+ df['Low']+df['Close'])/4
idx = df.index.name
df.reset_index(inplace=True)
for i in range(0, len(df)):
if i == 0:
df.set_value(i, 'HA_Open', ((df.get_value(i, 'Open') + df.get_value(i, 'Close')) / 2))
else:
df.set_value(i, 'HA_Open', ((df.get_value(i - 1, 'HA_Open') + df.get_value(i - 1, 'HA_Close')) / 2))
if idx:
df.set_index(idx, inplace=True)
df['HA_High']=df[['HA_Open','HA_Close','High']].max(axis=1)
df['HA_Low']=df[['HA_Open','HA_Close','Low']].min(axis=1)
return df
Here is my test algorithm (essentially I used the algorithm provided in this post to benchmark the speed results):
import quandl
import time
df = quandl.get("NSE/NIFTY_50", start_date='1997-01-01')
def test_HA():
print('HA Test')
start = time.time()
HA(df)
end = time.time()
print('Time taken by set and get value functions for HA {}'.format(end-start))
start = time.time()
df['HA_Close_t']=(df['Open']+ df['High']+ df['Low']+df['Close'])/4
from collections import namedtuple
nt = namedtuple('nt', ['Open','Close'])
previous_row = nt(df.ix[0,'Open'],df.ix[0,'Close'])
i = 0
for row in df.itertuples():
ha_open = (previous_row.Open + previous_row.Close) / 2
df.ix[i,'HA_Open_t'] = ha_open
previous_row = nt(ha_open, row.Close)
i += 1
df['HA_High_t']=df[['HA_Open_t','HA_Close_t','High']].max(axis=1)
df['HA_Low_t']=df[['HA_Open_t','HA_Close_t','Low']].min(axis=1)
end = time.time()
print('Time taken by ix (iloc, loc) functions for HA {}'.format(end-start))
Here is the output I got on my i7 processor (please note the results may vary depending on your processor speed but I assume that the results will be similar):
HA Test
Time taken by set and get value functions for HA 0.05005788803100586
Time taken by ix (iloc, loc) functions for HA 0.9360761642456055
My experience with Pandas shows that functions like ix
, loc
, iloc
are slower in comparison to set_value
and get_value
functions. Moreover computing value for a column on itself using shift
function gives erroneous results.
回答2:
I'm not that knowledgeable regarding Python, or Pandas, but after some research, this is what I could figure would be a good solution.
Please, feel free to add any comments. I very much appreciate.
I used namedtuples and itertuples (seem to be the fastest, if looping through a DataFrame).
I hope it helps!
def HA(df):
df['HA_Close']=(df['Open']+ df['High']+ df['Low']+df['Close'])/4
nt = namedtuple('nt', ['Open','Close'])
previous_row = nt(df.ix[0,'Open'],df.ix[0,'Close'])
i = 0
for row in df.itertuples():
ha_open = (previous_row.Open + previous_row.Close) / 2
df.ix[i,'HA_Open'] = ha_open
previous_row = nt(ha_open, row.Close)
i += 1
df['HA_High']=df[['HA_Open','HA_Close','High']].max(axis=1)
df['HA_Low']=df[['HA_Open','HA_Close','Low']].min(axis=1)
return df
回答3:
def heikenashi(df):
df['HA_Close'] = (df['Open'] + df['High'] + df['Low'] + df['Close']) / 4
df['HA_Open'] = (df['Open'].shift(1) + df['Open'].shift(1)) / 2
df.iloc[0, df.columns.get_loc("HA_Open")] = (df.iloc[0]['Open'] + df.iloc[0]['Close'])/2
df['HA_High'] = df[['High', 'Low', 'HA_Open', 'HA_Close']].max(axis=1)
df['HA_Low'] = df[['High', 'Low', 'HA_Open', 'HA_Close']].min(axis=1)
df = df.drop(['Open', 'High', 'Low', 'Close'], axis=1) # remove old columns
df = df.rename(columns={"HA_Open": "Open", "HA_High": "High", "HA_Low": "Low", "HA_Close": "Close", "Volume": "Volume"})
df = df[['Open', 'High', 'Low', 'Close', 'Volume']] # reorder columns
return df
回答4:
Will be faster with numpy.
def HEIKIN(O, H, L, C, oldO, oldC):
HA_Close = (O + H + L + C)/4
HA_Open = (oldO + oldC)/2
elements = numpy.array([H, L, HA_Open, HA_Close])
HA_High = elements.max(0)
HA_Low = elements.min(0)
out = numpy.array([HA_Close, HA_Open, HA_High, HA_Low])
return out
回答5:
Unfortunately, set_value(), and get_value() are deprecated. Building off arkochhar's answer, I was able to get a 75% speed increase by using the following list comprehension method with my own OHLC data (7000 rows of data). It is faster than using at and iat as well.
def HA( dataframe ):
df = dataframe.copy()
df['HA_Close']=(df.Open + df.High + df.Low + df.Close)/4
df.reset_index(inplace=True)
ha_open = [ (df.Open[0] + df.Close[0]) / 2 ]
[ ha_open.append((ha_open[i] + df.HA_Close.values[i]) / 2) \
for i in range(0, len(df)-1) ]
df['HA_Open'] = ha_open
df.set_index('index', inplace=True)
df['HA_High']=df[['HA_Open','HA_Close','High']].max(axis=1)
df['HA_Low']=df[['HA_Open','HA_Close','Low']].min(axis=1)
return df
回答6:
I adjusted the code to make it work with Python 3.7
def HA(df):
df_HA = df
df_HA['Close']=(df['Open']+ df['High']+ df['Low']+df['Close'])/4
#idx = df_HA.index.name
#df_HA.reset_index(inplace=True)
for i in range(0, len(df)):
if i == 0:
df_HA['Open'][i]= ( (df['Open'][i] + df['Close'][i] )/ 2)
else:
df_HA['Open'][i] = ( (df['Open'][i-1] + df['Close'][i-1] )/ 2)
#if idx:
#df_HA.set_index(idx, inplace=True)
df_HA['High']=df[['Open','Close','High']].max(axis=1)
df_HA['Low']=df[['Open','Close','Low']].min(axis=1)
return df_HA
回答7:
Perfectly working HekinAshi function. I am not the original author of this code. I found this on Github (https://github.com/emreturan/heikin-ashi/blob/master/heikin_ashi.py)
def heikin_ashi(df):
heikin_ashi_df = pd.DataFrame(index=df.index.values, columns=['open', 'high', 'low', 'close'])
heikin_ashi_df['close'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
for i in range(len(df)):
if i == 0:
heikin_ashi_df.iat[0, 0] = df['open'].iloc[0]
else:
heikin_ashi_df.iat[i, 0] = (heikin_ashi_df.iat[i-1, 0] + heikin_ashi_df.iat[i-1, 3]) / 2
heikin_ashi_df['high'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['high']).max(axis=1)
heikin_ashi_df['low'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['low']).min(axis=1)
return heikin_ashi_df
来源:https://stackoverflow.com/questions/40613480/heiken-ashi-using-pandas-python