I am a new python afficionado. For R users, there is one function : paste that helps to concatenate two or more variables in a dataframe. It\'s very useful. For example Suppose
You can trypandas.Series.str.cat
import pandas as pd
def paste0(ss,sep=None,na_rep=None,):
'''Analogy to R paste0'''
ss = [pd.Series(s) for s in ss]
ss = [s.astype(str) for s in ss]
s = ss[0]
res = s.str.cat(ss[1:],sep=sep,na_rep=na_rep)
return res
pasteA=paste0
Or just sep.join()
def paste0(ss,sep=None,na_rep=None,
castF=unicode, ##### many languages dont work well with str
):
if sep is None:
sep=''
res = [castF(sep).join(castF(s) for s in x) for x in zip(*ss)]
return res
pasteB = paste0
%timeit pasteA([range(1000),range(1000,0,-1)],sep='_')
# 100 loops, best of 3: 7.11 ms per loop
%timeit pasteB([range(1000),range(1000,0,-1)],sep='_')
# 100 loops, best of 3: 2.24 ms per loop
I have used itertools
to mimic recycling
import itertools
def paste0(ss,sep=None,na_rep=None,castF=unicode):
'''Analogy to R paste0
'''
if sep is None:
sep=u''
L = max([len(e) for e in ss])
it = itertools.izip(*[itertools.cycle(e) for e in ss])
res = [castF(sep).join(castF(s) for s in next(it) ) for i in range(L)]
# res = pd.Series(res)
return res
patsy might be relevant (not an experienced user myself.)