I have a python console application that contains 300+ regular expressions. The set of regular expressions is fixed for each release. When users run the app, the entire se
I had the same problem and instead of patching python's re
module I opted to create a long running regex "service" instead. Basic code appended below. Please note: It is not designed to handle multiple clients in parallel, i.e. the server is only available once a client has closed the connection.
server
from multiprocessing.connection import Client
from multiprocessing.connection import Listener
import re
class RegexService(object):
patternsByRegex = None
def __init__(self):
self.patternsByRegex = {}
def processMessage(self, message):
regex = message.get('regex')
result = {"error": None}
if regex == None:
result["error"] = "no regex in message - something is wrong with your client"
return result
text = message.get('text')
pattern = self.patternsByRegex.get(regex)
if pattern == None:
print "compiling previously unseen regex: %s" %(regex)
pattern = re.compile(regex, re.IGNORECASE)
self.patternsByRegex[regex] = pattern
if text == None:
result["error"] = "no match"
return result
match = pattern.match(text)
result["matchgroups"] = None
if match == None:
return result
result["matchgroups"] = match.groups()
return result
workAddress = ('localhost', 6000)
resultAddress = ('localhost', 6001)
listener = Listener(workAddress, authkey='secret password')
service = RegexService()
patterns = {}
while True:
connection = listener.accept()
resultClient = Client(resultAddress, authkey='secret password')
while True:
try:
message = connection.recv()
resultClient.send(service.processMessage(message))
except EOFError:
resultClient.close()
connection.close()
break
listener.close()
testclient
from multiprocessing.connection import Client
from multiprocessing.connection import Listener
workAddress = ('localhost', 6000)
resultAddress = ('localhost', 6001)
regexClient = Client(workAddress, authkey='secret password')
resultListener = Listener(resultAddress, authkey='secret password')
resultConnection = None
def getResult():
global resultConnection
if resultConnection == None:
resultConnection = resultListener.accept()
return resultConnection.recv()
regexClient.send({
"regex": r'.*'
})
print str(getResult())
regexClient.send({
"regex": r'.*',
"text": "blub"
})
print str(getResult())
regexClient.send({
"regex": r'(.*)',
"text": "blub"
})
print str(getResult())
resultConnection.close()
regexClient.close()
output of test client run 2 times
$ python ./regexTest.py
{'error': 'no match'}
{'matchgroups': (), 'error': None}
{'matchgroups': ('blub',), 'error': None}
$ python ./regexTest.py
{'error': 'no match'}
{'matchgroups': (), 'error': None}
{'matchgroups': ('blub',), 'error': None}
output of service process during both test runs
$ python ./regexService.py
compiling previously unseen regex: .*
compiling previously unseen regex: (.*)