I\'m trying to figure out if there\'s a reasonably efficient way to perform a lookup in a dictionary (or a hash, or a map, or whatever your favorite language calls it) where the
@rptb1 you don't have to avoid capturing groups, because you can use re.groups to count them. Like this:
# Regular expression map
# Abuses match.lastindex to figure out which key was matched
# (i.e. to emulate extracting the terminal state of the DFA of the regexp engine)
# Mostly for amusement.
# Richard Brooksby, Ravenbrook Limited, 2013-06-01
import re
class ReMap(object):
def __init__(self, items):
if not items:
items = [(r'epsilon^', None)] # Match nothing
self.re = re.compile('|'.join('('+k+')' for (k,v) in items))
self.lookup = {}
index = 1
for key, value in items:
self.lookup[index] = value
index += re.compile(key).groups + 1
def __getitem__(self, key):
m = self.re.match(key)
if m:
return self.lookup[m.lastindex]
raise KeyError(key)
def test():
remap = ReMap([(r'foo.', 12),
(r'.*([0-9]+)', 99),
(r'FileN.*', 35),
])
print remap['food']
print remap['foot in my mouth']
print remap['FileNotFoundException: file.x does not exist']
print remap['there were 99 trombones']
print remap['food costs $18']
print remap['bar']
if __name__ == '__main__':
test()
Sadly very few RE engines actually compile the regexps down to machine code, although it's not especially hard to do. I suspect there's an order of magnitude performance improvement waiting for someone to make a really good RE JIT library.