import pandas as pd
import glob
data_dir = './data/'
file_paths = glob.glob(data_dir + '*.zip')
file_paths = sorted(file_paths)
df_list = []
print(len(file_paths))
for path in file_paths[-4:]:
df_list.append(pd.read_csv(path,
names = ['index', 'player_id', 'giftID', 'state', 'offer_time'],
sep = ',', compression='zip', quotechar='"'))
log = pd.concat(df_list)
del df_list
log = log[log['giftID'].isin(exchange['charge_id'])]
print(log.shape)
import json
def parse_log(log):
feature = log[['index', 'player_id', 'giftID', 'offer_time']]
feature_dict = dict()
keys = json.loads(log['state'].iloc[0]).keys()
for key in keys:
feature_dict[key] = []
for line in log['state']:
data = json.loads(line)
for key in keys:
feature_dict[key].append(data[key])
for key in keys:
feature[key] = feature_dict[key]
return feature
来源:https://blog.csdn.net/luoganttcc/article/details/100669622