user = {\'id\':[\'abab23\', \'abab21\', \'abab22\', \'abab25\', \'abab24\', \'abab30\', \'abab252\', \'abab15\'],
\'dob\':[\'10-10-1990\',\'1-12-1993\', \'12-12-
Try this:
activities = {'sentconn':['abab35', 'abab15', 'abab25', 'abab23','abab22', 'abab15'],
'receiveconn': ['abab24', 'abab24', 'abab21', 'abab35', 'abab252', 'abab30'],
'sentdate':['2-10-2020', '2-10-2020','4-10-2020', '5-10-2020', '10-10-2020', '11-10-2020'],
'receivedDate':['2-10-2020', '2-10-2020','4-10-2020', '5-10-2020', '10-10-2020', '11-10-2020']}
user = {'id':['abab23', 'abab21', 'abab22', 'abab25', 'abab24', 'abab30', 'abab252', 'abab15'],
'dob':['10-10-1990','1-12-1993', '12-12-2000', '2-10-1999', '2-10-1999', '2-10-1999', '2-10-1999', '2-10-1999']}
usr_df = pd.DataFrame(user)
df = pd.DataFrame(activities)
#group by the required columns to get the count.
df1 = df.groupby(['sentdate','sentconn']).agg({'sentconn':'count'})
df2 = df.groupby(['receivedDate','receiveconn']).agg({'receiveconn':'count'})
#rename the axis so that you get common columns to concat
df1 = df1.rename_axis(['date','user'])
df2 = df2.rename_axis(['date','user'])
df = pd.concat([df1, df2],axis=1)\
.fillna(0)\
.reset_index()
#filter the user id not present is user df as required.
df = df.loc[df['user'].isin(usr_df['id'])]\
.set_index(['date','user'])
print(df)
outputs:
sentconn receiveconn
date user
10-10-2020 abab22 1.0 0.0
abab252 0.0 1.0
11-10-2020 abab15 1.0 0.0
abab30 0.0 1.0
2-10-2020 abab15 1.0 0.0
abab24 0.0 2.0
4-10-2020 abab21 0.0 1.0
abab25 1.0 0.0
5-10-2020 abab23 1.0 0.0
Use:
#seelct only necessary columns
activities = activities[['sentconn','receiveconj','sentdate','receivedDate']]
#set new columns names
activities.columns = ['sent_id','receive_id','sent_date','receive_date']
#ssplit columns names by _ to MultiIndex
activities.columns = activities.columns.str.split('_', expand=True)
#reshape DataFrame and filter by is with id in inner merge
activities = (activities.stack(0)
.rename_axis([None, 'type'])
.reset_index(level=1)
.merge(user['id']))
print (activities)
type date id
0 receive 2-10-2020 abab24
1 receive 2-10-2020 abab24
2 sent 2-10-2020 abab15
3 sent 11-10-2020 abab15
4 receive 4-10-2020 abab21
5 sent 4-10-2020 abab25
6 sent 5-10-2020 abab23
7 receive 10-10-2020 abab252
8 sent 10-10-2020 abab22
9 receive 11-10-2020 abab30
#get counts by crosstab
df = pd.crosstab([activities['date'], activities['id']], activities['type'])
print (df)
type receive sent
date id
10-10-2020 abab22 0 1
abab252 1 0
11-10-2020 abab15 0 1
abab30 1 0
2-10-2020 abab15 0 1
abab24 2 0
4-10-2020 abab21 1 0
abab25 0 1
5-10-2020 abab23 0 1