问题
I'm using the linearmodels package to estimate a Panel-OLS. As an example see:
import numpy as np
from statsmodels.datasets import grunfeld
data = grunfeld.load_pandas().data
data.year = data.year.astype(np.int64)
# MultiIndex, entity - time
data = data.set_index(['firm','year'])
from linearmodels import PanelOLS
mod = PanelOLS(data.invest, data[['value','capital']], entity_effect=True)
res = mod.fit(cov_type='clustered', cluster_entity=True)
I want to export the regression's output in a .tex file. Is there a convenient way of formatting the output with confidence stars and without the other information like the CIs? The question has been asked in the context of a standard OLS in here but this does not apply for a 'PanelEffectsResults' object, since I get the following error:
'PanelEffectsResults' object has no attribute 'bse'
Thanks in advance.
回答1:
A bit late but here is what I use. In the example above I calculated two fixed effects regressions with their results stored in fe_res_VS
and fe_res_CVS
:
pd.set_option('precision', 4)
pd.options.display.float_format = '{:,.4f}'.format
Reg_Output_FAmount= pd.DataFrame()
#1)
Table1 = pd.DataFrame(fe_res_VS.params)
Table1['id'] = np.arange(len(Table1))#create numerical index for pd.DataFrame
Table1 = Table1.reset_index().set_index(keys = 'id')#set numercial index as new index
Table1 = Table1.rename(columns={"index":"parameter", "parameter":"coefficient 1"})
P1 = pd.DataFrame(fe_res_VS.pvalues)
P1['id'] = np.arange(len(P1))#create numerical index for pd.DataFrame
P1 = P1.reset_index().set_index(keys = 'id')#set numercial index as new index
P1 = P1.rename(columns={"index":"parameter"})
Table1 = pd.merge(Table1, P1, on='parameter')
Table1['significance 1'] = np.where(Table1['pvalue'] <= 0.01, '***',\
np.where(Table1['pvalue'] <= 0.05, '**',\
np.where(Table1['pvalue'] <= 0.1, '*', '')))
Table1.rename(columns={"pvalue": "pvalue 1"}, inplace=True)
SE1 = pd.DataFrame(fe_res_VS.std_errors)
SE1['id'] = np.arange(len(SE1))#create numerical index for pd.DataFrame
SE1 = SE1.reset_index().set_index(keys = 'id')#set numercial index as new index
SE1 = SE1.rename(columns={"index":"parameter", "std_error":"coefficient 1"})
SE1['parameter'] = SE1['parameter'].astype(str) + '_SE'
SE1['significance 1'] = ''
SE1 = SE1.round(4)
SE1['coefficient 1'] = '(' + SE1['coefficient 1'].astype(str) + ')'
Table1 = Table1.append(SE1)
Table1 = Table1.sort_values('parameter')
Table1.replace(np.nan,'', inplace=True)
del P1
del SE1
#2)
Table2 = pd.DataFrame(fe_res_CVS.params)
Table2['id'] = np.arange(len(Table2))#create numerical index for pd.DataFrame
Table2 = Table2.reset_index().set_index(keys = 'id')#set numercial index as new index
Table2 = Table2.rename(columns={"index":"parameter", "parameter":"coefficient 2"})
P2 = pd.DataFrame(fe_res_CVS.pvalues)
P2['id'] = np.arange(len(P2))#create numerical index for pd.DataFrame
P2 = P2.reset_index().set_index(keys = 'id')#set numercial index as new index
P2 = P2.rename(columns={"index":"parameter"})
Table2 = pd.merge(Table2, P2, on='parameter')
Table2['significance 2'] = np.where(Table2['pvalue'] <= 0.01, '***',\
np.where(Table2['pvalue'] <= 0.05, '**',\
np.where(Table2['pvalue'] <= 0.1, '*', '')))
Table2.rename(columns={"pvalue": "pvalue 2"}, inplace=True)
SE2 = pd.DataFrame(fe_res_CVS.std_errors)
SE2['id'] = np.arange(len(SE2))#create numerical index for pd.DataFrame
SE2 = SE2.reset_index().set_index(keys = 'id')#set numercial index as new index
SE2 = SE2.rename(columns={"index":"parameter", "std_error":"coefficient 2"})
SE2['parameter'] = SE2['parameter'].astype(str) + '_SE'
SE2['significance 2'] = ''
SE2 = SE2.round(4)
SE2['coefficient 2'] = '(' + SE2['coefficient 2'].astype(str) + ')'
Table2 = Table2.append(SE2)
Table2 = Table2.sort_values('parameter')
Table2.replace(np.nan,'', inplace=True)
del P2
del SE2
#Merging Tables and adding Stats
Reg_Output_FAmount= pd.merge(Table1, Table2, on='parameter', how='outer')
Reg_Output_FAmount = Reg_Output_FAmount.append(pd.DataFrame(np.array([["observ.", fe_res_VS.nobs, '', fe_res_CVS.nobs, '']]), columns=['parameter', 'pvalue 1', 'significance 1', 'pvalue 2', 'significance 2']), ignore_index=True)
Reg_Output_FAmount = Reg_Output_FAmount.append(pd.DataFrame(np.array([["Rsquared", "{:.4f}".format(fe_res_VS.rsquared), '', "{:.4f}".format(fe_res_CVS.rsquared), '']]), columns=['parameter', 'pvalue 1', 'significance 1', 'pvalue 2', 'significance 2']), ignore_index=True)
Reg_Output_FAmount= Reg_Output_FAmount.append(pd.DataFrame(np.array([["Model type", fe_res_VS.name, '', fe_res_CVS.name, '']]), columns=['parameter', 'pvalue 1', 'significance 1', 'pvalue 2', 'significance 2']), ignore_index=True)
Reg_Output_FAmount = Reg_Output_FAmount.append(pd.DataFrame(np.array([["DV", fe_res_VS.model.dependent.vars[0], '', fe_res_CVS.model.dependent.vars[0], '']]), columns=['parameter', 'pvalue 1', 'significance 1', 'pvalue 2', 'significance 2']), ignore_index=True)
Reg_Output_FAmount.fillna('', inplace=True)
resulting in a nice regression output looking like that:
parameter coefficient 1 pvalue 1 significance 1 coefficient 2 pvalue 2 significance 2
0 IV 0.0676 0.2269 0.0732 0.1835
1 IV_SE (0.0559) (0.055)
2 Control 0.3406 0.0125 ** 0.3482 0.0118 **
3 Control_SE (0.1363) 0.1383)
4 const 0.2772 0.0000 *** 0.2769 0.0000 ***
5 const_SE (0.012) (0.012)
6 observ. 99003 99003
7 Rsquared 0.12 0.14
8 Model type PanelOLS PanelOLS
9 DV FAmount FAmount
来源:https://stackoverflow.com/questions/46279576/linearmodels-panelols-regression-output-with-stars