i have a list which contains some string items
res = [\"FAV_VENUE_CITY_NAME == \'Mumbai\' & EVENT_GENRE == \'KIDS\' & count_EVENT_GENRE >= 1\",
\"FAV_
You can try regex as below, i used regex-list and search
method for matching and ast
module for formatting strings (for getting 'Hindi'
from "'Hindi'"
) afteral i used uniquer
function to get unique elements of list.
import re,ast
res = ["FAV_VENUE_CITY_NAME == 'Mumbai' & EVENT_GENRE == 'KIDS' & count_EVENT_GENRE >= 1",
"FAV_VENUE_CITY_NAME == 'Mumbai' & EVENT_GENRE == 'FANTASY' & count_EVENT_GENRE >= 1",
"FAV_VENUE_CITY_NAME =='Mumbai' & EVENT_GENRE == 'FESTIVAL' & count_EVENT_GENRE >= 1",
"FAV_VENUE_CITY_NAME == 'New Delhi' & EVENT_GENRE == 'WORKSHOP' & count_EVENT_GENRE >= 1",
"FAV_VENUE_CITY_NAME == 'Mumbai' & EVENT_GENRE == 'EXHIBITION' & count_EVENT_GENRE >= 1",
"FAV_VENUE_CITY_NAME == 'Bangalore' & FAV_GENRE == '|DRAMA|'",
"FAV_VENUE_CITY_NAME = 'Mumbai' & & FAV_GENRE == '|ACTION|ADVENTURE|SCI-FI|'",
"FAV_VENUE_CITY_NAME == 'Bangalore' & FAV_GENRE == '|COMEDY|'",
"FAV_VENUE_CITY_NAME == 'Bangalore' & FAV_GENRE == 'DRAMA' & FAV_LANGUAGE == 'English'",
"FAV_VENUE_CITY_NAME == 'New Delhi' & FAV_LANGUAGE == 'Hindi' & count_EVENT_LANGUAGE >= 1"]
FAV_VENUE_CITY_NAME = []
EVENT_GENRE = []
FAV_GENRE = []
FAV_LANGUAGE = []
count_on_field = []
pat =[ """FAV_VENUE_CITY_NAME[\s==]+(.*?)&""","""EVENT_GENRE[\s==]+(.*?)&""","""FAV_GENRE[\s==]+(.*?)(?:\s|&|$)""","""FAV_LANGUAGE[\s==]+(.*?)(?:\s|&|$)""","""count_(\w+)"""]
def matcher(st,indx,lst):
if re.compile(pat[indx]).search(st):
lst.append(re.compile(pat[indx]).search(s).groups()[0].strip())
def uniquer(l):
try:
return map(ast.literal_eval,list(set(l)))
except:
return list(set(l))
for s in res:
matcher(s,0,FAV_VENUE_CITY_NAME)
matcher(s,1,EVENT_GENRE)
matcher(s,2,FAV_GENRE)
matcher(s,3,FAV_LANGUAGE)
matcher(s,4,count_on_field)
print uniquer(FAV_GENRE),uniquer(FAV_LANGUAGE),uniquer(FAV_VENUE_CITY_NAME),uniquer(EVENT_GENRE),uniquer(count_on_field)
Output-
['|DRAMA|', '|COMEDY|', '|ACTION|ADVENTURE|SCI-FI|', 'DRAMA'] ['Hindi', 'English'] ['New Delhi', 'Mumbai', 'Bangalore'] ['FESTIVAL', 'WORKSHOP', 'FANTASY', 'KIDS', 'EXHIBITION'] ['EVENT_GENRE', 'EVENT_LANGUAGE']
See the live regex DEMO.