Is there a way to really pickle compiled regular expressions in python?

后端 未结 7 1656
一整个雨季
一整个雨季 2020-12-13 15:00

I have a python console application that contains 300+ regular expressions. The set of regular expressions is fixed for each release. When users run the app, the entire se

相关标签:
7条回答
  • 2020-12-13 15:37

    I had the same problem and instead of patching python's re module I opted to create a long running regex "service" instead. Basic code appended below. Please note: It is not designed to handle multiple clients in parallel, i.e. the server is only available once a client has closed the connection.

    server

    from multiprocessing.connection import Client
    from multiprocessing.connection import Listener
    import re
    
    class RegexService(object):
        patternsByRegex = None
    
        def __init__(self):
            self.patternsByRegex = {}
    
        def processMessage(self, message):
            regex = message.get('regex')
            result = {"error": None}
            if regex == None:
                result["error"] = "no regex in message - something is wrong with your client"
                return result
            text = message.get('text')
            pattern = self.patternsByRegex.get(regex)
            if pattern == None:
                print "compiling previously unseen regex: %s" %(regex)
                pattern = re.compile(regex, re.IGNORECASE)
                self.patternsByRegex[regex] = pattern
            if text == None:
                result["error"] = "no match"
                return result
            match = pattern.match(text)
            result["matchgroups"] = None
            if match == None:
                return result
            result["matchgroups"] = match.groups()
            return result
    
    workAddress = ('localhost', 6000)
    resultAddress = ('localhost', 6001)
    listener = Listener(workAddress, authkey='secret password')
    service = RegexService()
    patterns = {}
    while True:
        connection = listener.accept()
        resultClient = Client(resultAddress, authkey='secret password')
        while True:
            try:
                message = connection.recv()
                resultClient.send(service.processMessage(message))
            except EOFError:
                resultClient.close()
                connection.close()
                break
    listener.close()
    

    testclient

    from multiprocessing.connection import Client
    from multiprocessing.connection import Listener
    
    
    workAddress = ('localhost', 6000)
    resultAddress = ('localhost', 6001)
    regexClient = Client(workAddress, authkey='secret password')
    resultListener = Listener(resultAddress, authkey='secret password')
    resultConnection = None
    
    def getResult():
        global resultConnection
        if resultConnection == None:
            resultConnection = resultListener.accept()
        return resultConnection.recv()
    
    regexClient.send({
        "regex": r'.*'
    })
    print str(getResult())
    regexClient.send({
        "regex": r'.*',
        "text": "blub"
    })
    print str(getResult())
    regexClient.send({
        "regex": r'(.*)',
        "text": "blub"
    })
    print str(getResult())
    resultConnection.close()
    regexClient.close()
    

    output of test client run 2 times

    $ python ./regexTest.py 
    {'error': 'no match'}
    {'matchgroups': (), 'error': None}
    {'matchgroups': ('blub',), 'error': None}
    $ python ./regexTest.py 
    {'error': 'no match'}
    {'matchgroups': (), 'error': None}
    {'matchgroups': ('blub',), 'error': None}
    

    output of service process during both test runs

    $ python ./regexService.py
    compiling previously unseen regex: .*
    compiling previously unseen regex: (.*)
    
    0 讨论(0)
提交回复
热议问题