filter out some items from a list and store in different arrays in python

后端 未结 1 694
迷失自我
迷失自我 2021-01-26 12:55

i have a list which contains some string items

res = [\"FAV_VENUE_CITY_NAME == \'Mumbai\' & EVENT_GENRE == \'KIDS\' & count_EVENT_GENRE >= 1\",
\"FAV_         


        
1条回答
  •  野趣味
    野趣味 (楼主)
    2021-01-26 13:44

    You can try regex as below, i used regex-list and search method for matching and ast module for formatting strings (for getting 'Hindi' from "'Hindi'") afteral i used uniquer function to get unique elements of list.

    import re,ast
    
    res = ["FAV_VENUE_CITY_NAME == 'Mumbai' & EVENT_GENRE == 'KIDS' & count_EVENT_GENRE >= 1",
    "FAV_VENUE_CITY_NAME == 'Mumbai' & EVENT_GENRE == 'FANTASY' & count_EVENT_GENRE >= 1",
    "FAV_VENUE_CITY_NAME =='Mumbai' & EVENT_GENRE == 'FESTIVAL' & count_EVENT_GENRE >= 1",
    "FAV_VENUE_CITY_NAME == 'New Delhi' & EVENT_GENRE == 'WORKSHOP' & count_EVENT_GENRE >= 1",
    "FAV_VENUE_CITY_NAME == 'Mumbai' & EVENT_GENRE == 'EXHIBITION' & count_EVENT_GENRE >= 1",
    "FAV_VENUE_CITY_NAME == 'Bangalore' & FAV_GENRE == '|DRAMA|'",
    "FAV_VENUE_CITY_NAME = 'Mumbai' &  & FAV_GENRE == '|ACTION|ADVENTURE|SCI-FI|'",
    "FAV_VENUE_CITY_NAME == 'Bangalore' & FAV_GENRE == '|COMEDY|'",
    "FAV_VENUE_CITY_NAME == 'Bangalore' & FAV_GENRE == 'DRAMA' & FAV_LANGUAGE == 'English'",
    "FAV_VENUE_CITY_NAME == 'New Delhi' & FAV_LANGUAGE == 'Hindi' & count_EVENT_LANGUAGE >= 1"]
    
    FAV_VENUE_CITY_NAME = []
    EVENT_GENRE = []
    FAV_GENRE = []
    FAV_LANGUAGE = []
    count_on_field = []
    
    pat =[ """FAV_VENUE_CITY_NAME[\s==]+(.*?)&""","""EVENT_GENRE[\s==]+(.*?)&""","""FAV_GENRE[\s==]+(.*?)(?:\s|&|$)""","""FAV_LANGUAGE[\s==]+(.*?)(?:\s|&|$)""","""count_(\w+)"""]
    
    def matcher(st,indx,lst):
        if re.compile(pat[indx]).search(st):
            lst.append(re.compile(pat[indx]).search(s).groups()[0].strip())
    def uniquer(l):
        try:
            return map(ast.literal_eval,list(set(l)))
        except:
            return list(set(l))
    
    for s in res:
        matcher(s,0,FAV_VENUE_CITY_NAME)
        matcher(s,1,EVENT_GENRE)
        matcher(s,2,FAV_GENRE)
        matcher(s,3,FAV_LANGUAGE)
        matcher(s,4,count_on_field)
    
    print uniquer(FAV_GENRE),uniquer(FAV_LANGUAGE),uniquer(FAV_VENUE_CITY_NAME),uniquer(EVENT_GENRE),uniquer(count_on_field)
    

    Output-

    ['|DRAMA|', '|COMEDY|', '|ACTION|ADVENTURE|SCI-FI|', 'DRAMA'] ['Hindi', 'English'] ['New Delhi', 'Mumbai', 'Bangalore'] ['FESTIVAL', 'WORKSHOP', 'FANTASY', 'KIDS', 'EXHIBITION'] ['EVENT_GENRE', 'EVENT_LANGUAGE']
    

    See the live regex DEMO.

    0 讨论(0)
提交回复
热议问题