If we have a list
of strings
in python and want to create sublists based on some special string
how should we do?
For instance
I'm not sure wether this is the most "pythonic" way of solving it.
def split_seq(seq, sep):
start = 0
while start < len(seq):
try:
stop = start + seq[start:].index(sep)
yield seq[start:stop]
start = stop + 1
except ValueError:
yield seq[start:]
break
ll = ["data","more data","","data 2","more data 2","danger","","date3","lll"]
p = [i for i in split_seq(ll,"")]
itertools.groupby is one approach (as it often is):
>>> l = ["data","more data","","data 2","more data 2","danger","","date3","lll"]
>>> from itertools import groupby
>>> groupby(l, lambda x: x == "")
<itertools.groupby object at 0x9ce06bc>
>>> [list(group) for k, group in groupby(l, lambda x: x == "") if not k]
[['data', 'more data'], ['data 2', 'more data 2', 'danger'], ['date3', 'lll']]
We can even cheat a little because of this particular case:
>>> [list(group) for k, group in groupby(l, bool) if k]
[['data', 'more data'], ['data 2', 'more data 2', 'danger'], ['date3', 'lll']]
One possible implementation using itertools
>>> l
['data', 'more data', '', 'data 2', 'more data 2', 'danger', '', 'date3', 'lll']
>>> it_l = iter(l)
>>> from itertools import takewhile, dropwhile
>>> [[e] + list(takewhile(lambda e: e != "", it_l)) for e in it_l if e != ""]
[['data', 'more data'], ['data 2', 'more data 2', 'danger'], ['date3', 'lll']]
Note*
This is as fast as using groupby
>>> stmt_dsm = """
[list(group) for k, group in groupby(l, lambda x: x == "") if not k]
"""
>>> stmt_ab = """
it_l = iter(l)
[[e] + list(takewhile(lambda e: e != "", it_l)) for e in it_l if e != ""]
"""
>>> t_ab = timeit.Timer(stmt = stmt_ab, setup = "from __main__ import l, dropwhile, takewhile")
>>> t_dsm = timeit.Timer(stmt = stmt_dsm, setup = "from __main__ import l, groupby")
>>> t_ab.timeit(100000)
1.6863486541265047
>>> t_dsm.timeit(100000)
1.5298066765462863
>>> t_ab.timeit(100000)
1.735611326163962
>>>
lst = ["data","more data","","data 2","more data 2","danger","","date3","lll"]
join_list = ",".join(lst)
split_list = join_list.split(",,")
result = [i.split() for i in split_list]
#result =[['data,more', 'data'], ['data', '2,more', 'data', '2,danger'], ['date3,lll']]
Heres one idea. :)
def spec_split(seq,sep):
# Ideally this separator will never be in your list
odd_sep = "!@#$%^&*()"
# Join all the items with the odd separator and split
# anywhere the odd separator + separator + odd seperator meet
# This makes a list of items broken by the separator
jumble = odd_sep.join(seq).split(odd_sep+sep+odd_sep)
# split the remaining items broken by odd separators into sublists
return [item.split(odd_sep) for item in jumble]
reduce comes to mind:
def split(iterable, where):
def splitter(acc, item, where=where):
if item == where:
acc.append([])
else:
acc[-1].append(item)
return acc
return reduce(splitter, iterable, [[]])
data = ["data","more data","","data 2","more data 2","danger","","date3","lll"]
print split(data, '')
Result:
[['data', 'more data'], ['data 2', 'more data 2', 'danger'], ['date3', 'lll']]