I do have a dataframe like this:
import pandas as pd
df = pd.DataFrame({\"c0\": list(\'ABC\'),
\"c1\": [\" \".join(list(\'ab\')), \" \".j
This is how I get the result , In R it is called unnest.
df.c1=df.c1.apply(lambda x : pd.Series(x).str.split(' '))
df.set_index(['c0', 'c2'])['c1'].apply(pd.Series).stack().reset_index().drop('level_2',1).rename(columns={0:'c1'}).set_index(['c0','c1'])
Out[208]:
c2
c0 c1
A a D
b D
B d E
e E
f E
C s F
Option 1
import numpy as np, pandas as pd
s = df.c1.str.split()
l = s.str.len()
newdf = df.loc[df.index.repeat(l)].assign(c1=np.concatenate(s)).set_index(['c0', 'c1'])
newdf
c2
c0 c1
A a D
b D
B d E
e E
f E
C s F
Option 2
Should be faster
import numpy as np, pandas as pd
s = np.core.defchararray.split(df.c1.values.astype(str), ' ')
l = [len(x) for x in s.tolist()]
r = np.arange(len(s)).repeat(l)
i = pd.MultiIndex.from_arrays([
df.c0.values[r],
np.concatenate(s)
], names=['c0', 'c1'])
newdf = pd.DataFrame({'c2': df.c2.values[r]}, i)
newdf
c2
c0 c1
A a D
b D
B d E
e E
f E
C s F