i currently have a file that contains a list that is looks like
example = [\'Mary had a little lamb\' ,
\'Jack went up the hill\' ,
\'Ji
For me it's hard to tell, what you are trying to do.
How about this
exclude = set(['Mary', 'Jack', 'Jill', 'i', 'it'])
mod_example = []
for sentence in example:
words = sentence.split()
# Optionally sort out some words
for word in words:
if word in exclude:
words.remove(word)
mod_example.append('\'' + '\' \''.join(words) + '\'')
print mod_example
Which ouputs
["'had' 'a' 'little' 'lamb'", "'went' 'up' 'the' 'hill'", "'followed' 'suit'",
"'woke' 'up' 'suddenly'", "'was' 'a' 'really' 'bad' 'dream...'"]
>>>
Edit: Another suggestion based on further info given by the OP
example = ['Area1 Area1 street one, 4454 hikoland' ,
'Area2 street 2, 52432 hikoland, area2' ,
'Area3 ave three, 0534 hikoland' ]
mod_example = []
for sentence in example:
words = sentence.split()
# Sort out some words
col1 = words[0]
col2 = words[1:]
if col1 in col2:
col2.remove(col1)
elif col1.lower() in col2:
col2.remove(col1.lower())
mod_example.append(col1 + ': ' + ' '.join(col2))
Outputs
>>>> print mod_example
['Area1: street one, 4454 hikoland', 'Area2: street 2, 52432 hikoland,',
'Area3: ave three, 0534 hikoland']
>>>