Case-insensitive Lua pattern-matching

后端 未结 2 1944
走了就别回头了
走了就别回头了 2021-01-04 08:08

I\'m writing a grep utility in Lua for our mobile devices running Windows CE 6/7, but I\'ve run into some issues implementing case-insensitive match patterns. The obvious so

相关标签:
2条回答
  • 2021-01-04 08:50

    Try something like this:

    function case_insensitive_pattern(pattern)
    
      -- find an optional '%' (group 1) followed by any character (group 2)
      local p = pattern:gsub("(%%?)(.)", function(percent, letter)
    
        if percent ~= "" or not letter:match("%a") then
          -- if the '%' matched, or `letter` is not a letter, return "as is"
          return percent .. letter
        else
          -- else, return a case-insensitive character class of the matched letter
          return string.format("[%s%s]", letter:lower(), letter:upper())
        end
    
      end)
    
      return p
    end
    
    print(case_insensitive_pattern("xyz = %d+ or %% end"))
    

    which prints:

    [xX][yY][zZ] = %d+ [oO][rR] %% [eE][nN][dD]
    0 讨论(0)
  • 2021-01-04 08:51

    Lua 5.1, LPeg v0.12

    do
        local p = re.compile([[
            pattern  <- ( {b} / {escaped} / brackets / other)+
            b        <- "%b" . .
            escaped  <- "%" .
            brackets <- { "[" ([^]%]+ / escaped)* "]" }
            other    <- [^[%]+ -> cases
        ]], {
            cases = function(str) return (str:gsub('%a',function(a) return '['..a:lower()..a:upper()..']' end)) end
        })
        local pb = re.compile([[
            pattern  <- ( {b} / {escaped} / brackets / other)+
            b        <- "%b" . .
            escaped  <- "%" .
            brackets <- {: {"["} ({escaped} / bcases)* {"]"} :}
            bcases   <- [^]%]+ -> bcases
            other    <- [^[%]+ -> cases
        ]], {
            cases = function(str) return (str:gsub('%a',function(a) return '['..a:lower()..a:upper()..']' end)) end
            , bcases = function(str) return (str:gsub('%a',function(a) return a:lower()..a:upper() end)) end
        })
        function iPattern(pattern,brackets)
            ('sanity check'):find(pattern)
            return table.concat({re.match(pattern, brackets and pb or p)})
        end
    end
    
    local test                  = '[ab%c%]d%%]+ o%%r %bnm'
    print(iPattern(test))       -- [ab%c%]d%%]+ [oO]%%[rR] %bnm
    print(iPattern(test,true))  -- [aAbB%c%]dD%%]+ [oO]%%[rR] %bnm
    print(('qwe [%D]% O%r n---m asd'):match(iPattern(test, true))) -- %D]% O%r n---m
    

    Pure Lua version:

    It is necessary to analyze all the characters in the string to convert it into a correct pattern because Lua patterns do not have alternations like in regexps (abc|something).

    function iPattern(pattern, brackets)
        ('sanity check'):find(pattern)
        local tmp = {}
        local i=1
        while i <= #pattern do              -- 'for' don't let change counter
            local char = pattern:sub(i,i)   -- current char
            if char == '%' then
                tmp[#tmp+1] = char          -- add to tmp table
                i=i+1                       -- next char position
                char = pattern:sub(i,i)
                tmp[#tmp+1] = char
                if char == 'b' then         -- '%bxy' - add next 2 chars
                    tmp[#tmp+1] = pattern:sub(i+1,i+2)
                    i=i+2
                end
            elseif char=='[' then           -- brackets
                tmp[#tmp+1] = char
                i = i+1
                while i <= #pattern do
                    char = pattern:sub(i,i)
                    if char == '%' then     -- no '%bxy' inside brackets
                        tmp[#tmp+1] = char
                        tmp[#tmp+1] = pattern:sub(i+1,i+1)
                        i = i+1
                    elseif char:match("%a") then    -- letter
                        tmp[#tmp+1] = not brackets and char or char:lower()..char:upper()
                    else                            -- something else
                        tmp[#tmp+1] = char
                    end
                    if char==']' then break end -- close bracket
                    i = i+1
                end
            elseif char:match("%a") then    -- letter
                tmp[#tmp+1] = '['..char:lower()..char:upper()..']'
            else
                tmp[#tmp+1] = char          -- something else
            end
            i=i+1
        end
        return table.concat(tmp)
    end
    
    local test                  = '[ab%c%]d%%]+ o%%r %bnm'
    print(iPattern(test))       -- [ab%c%]d%%]+ [oO]%%[rR] %bnm
    print(iPattern(test,true))  -- [aAbB%c%]dD%%]+ [oO]%%[rR] %bnm
    print(('qwe [%D]% O%r n---m asd'):match(iPattern(test, true))) -- %D]% O%r n---m
    
    0 讨论(0)
提交回复
热议问题