知识点正则

package main

import (

    "fmt"

    "regexp"

func main() {

    reg, err := regexp.Compile("[a-z0-9#$%&]+")

    if err != nil {

        fmt.Println(err)

    fmt.Println(reg.MatchString("AIh"))

    fmt.Println(reg.MatchString("an82&#"))

运行结果：

false

true

第一个字符串AIh不匹配，第二个an82&#匹配。传入函数(re *Regexp) MatchString(s string) bool的字符串的每一个字符都会被检验是否属于[a-z0-9#$%&]其中的一个，

a-z表示从小写a到小写z的26个英文字母，0-9表示从0到9的10个数字，#$%&是四个特殊字符，AIh中有两个大写字母，一个小写字母，h属于a-z，但字母A和I都不属于a-z，

也不属于0-9，也不属于特殊字符，所以第一个不匹配，只要一段内容中有一个字符不匹配[a-z0-9#$%&]+，就表示该段内容不匹配，中括号外面的加号+表示多个匹配，

即要求每一个字符都属于小写字母或数字，或四个特殊字符中的一个；

[a-z0-7#$%&]去掉加号，表示某个字符串中只要有一个字符匹配，就表示匹配，每一个字符都不匹配，才表示不匹配。

reg, err := regexp.Compile("[a-z0-7#$%&]")

    if err != nil {

        fmt.Println(err)

    fmt.Println(reg.MatchString("AI"))

    fmt.Println(reg.MatchString("an82&#"))

    fmt.Println(reg.MatchString("A!+"))

    fmt.Println(reg.MatchString("aA!+"))

    fmt.Println(reg.MatchString(strconv.Itoa(8)))

    fmt.Println(reg.MatchString(strconv.Itoa(789)))

运行结果：

false

true

false

true

false

true

regexp.MustCompile函数的用法

该函数比regexp.Compile少一个返回值error，除此之外用法一样

package main

import (

    "fmt"

    "regexp"

    "strconv"

func main() {

    s := "日本"

    s2 := "中国"

    s3 := "ad"

    s4 := "G"

    s5 := 9

    s6 := 708

    s7 := "@"

    s8 := "国8h+¥œ"

    s9 := "%"

    s10 := "^"

    ss := make([]string, 0)

    ss = append(ss, s, s2, s3, s4, strconv.Itoa(s5), strconv.Itoa(s6), s7, s8, s9, s10)

    reg := regexp.MustCompile("^[a-zA-Z0-8中国!@#&*+_¥œø]+$")

    for k, v := range ss {

        fmt.Println(k, v, reg.MatchString(v))

运行结果：

0 日本 false

1 中国 true

2 ad true

3 G true

4 9 false

5 708 true

6 @ true

7 国8h+¥œ true

8 % false

9 ^ false

函数Compile(expr string) (*Regexp, error)和MustCompile(str string) *Regexp的参数是正则表达式；

函数(re *Regexp) MatchString(s string) bool的参数是需要检验的内容，

匹配中文

正则表达式"^[a-zA-Z0-9\u4e00-\u9fa5]{3,8}$"，匹配小写字母、大写字母、数字、或中文，长度3到8位。

package main

import (

    "fmt"

    "regexp"

func main() {

    reg, err := regexp.Compile("^[a-zA-Z0-9\u4e00-\u9fa5]{3,8}$")

    if err != nil {

        fmt.Println(err)

    fmt.Println(reg.MatchString("春暖花开"))

    fmt.Println(reg.MatchString("春暖"))

    fmt.Println(reg.MatchString("568"))

    fmt.Println(reg.MatchString("aingege"))

    fmt.Println(reg.MatchString("EIOGNE"))

    fmt.Println(reg.MatchString("DIfin梅6"))

运行结果：

true

false

true

true

true

true

函数Compile和MustCompile传入参数时要写在英文双引号里面，不可以是单引号，也不可以是特殊字符 ` //待验证

package main

import "bytes"

import "fmt"

import "regexp"

func main() {

    // 1. 这个测试一个字符串是否符合一个表达式。

    match, _ := regexp.MatchString("p([a-z]+)ch", "peach")

    fmt.Println("1.", match)

    // 上面我们是直接使用字符串，但是对于一些其他的正则任务，你需要使用 Compile 一个优化的 Regexp 结构体。

    r, _ := regexp.Compile("p([a-z]+)ch")

    // 2. 这个结构体有很多方法，这里是类似我们前面看到的一个匹配测试。

    fmt.Println("2.", r.MatchString("peach"))

    // 3. 这是查找匹配字符串的。

    fmt.Println("3.", r.FindString("peach punch"))

    // 4. 这个也是查找第一次匹配的字符串的，但是返回的匹配开始和结束位置索引，而不是匹配的内容。

    fmt.Println("4.", r.FindStringIndex("peach punch"))

    // 5. Submatch 返回 完全匹配 和 局部匹配 的字符串。例如，这里会返回 p([a-z]+)ch 和 ([a-z]+) 的信息。

    fmt.Println("5.", r.FindStringSubmatch("peach punch"))

    // 6. 类似的，这个会返回 完全匹配 和 局部匹配 的索引位置。

    fmt.Println("6.", r.FindStringSubmatchIndex("peach punch"))

    // 7. 带 All 的这个函数返回所有的匹配项，而不仅仅是首次匹配项。例如查找匹配表达式的所有项。

    fmt.Println("7.", r.FindAllString("peach punch pinch", -1))

    // 8. All 同样可以对应到上面的所有函数。

    fmt.Println("8.", r.FindAllStringSubmatchIndex("peach punch pinch", -1))

    // 9. 这个函数提供一个正整数来限制匹配次数。

    fmt.Println("9.", r.FindAllString("peach punch pinch", 2))

    // 10. 上面的例子中，我们使用了字符串作为参数，并使用了如 MatchString 这样的方法。我们也可以提供 []byte参数并将 String 从函数命中去掉。

    fmt.Println("10.", r.Match([]byte("peach")))

    // 11. 创建正则表示式常量时，可以使用 Compile 的变体MustCompile 。因为 Compile 返回两个值，不能用语常量。

    r = regexp.MustCompile("p([a-z]+)ch")

    fmt.Println("11.", r)

    // 12. regexp 包也可以用来替换部分字符串为其他值。

    fmt.Println("12.", r.ReplaceAllString("a peach", "<fruit>"))

    // 13. Func 变量允许传递匹配内容到一个给定的函数中，

    in := []byte("a peach")

    out := r.ReplaceAllFunc(in, bytes.ToUpper)

    fmt.Println("13.", string(out))

运行结果：

1. true

2. true

3. peach

4. [0 5]

5. [peach ea]

6. [0 5 1 3]

7. [peach punch pinch]

8. [[0 5 1 3] [6 11 7 9] [12 17 13 15]]

9. [peach punch]

10. true

11. p([a-z]+)ch

12. a <fruit>

13. a PEACH

我们期望在字符串  1000abcd123  中找出前后两个数字。

例子1：匹配到这个字符串的例子

package main

import(

    "fmt"

    "regexp"

var digitsRegexp = regexp.MustCompile(`(\d+)\D+(\d+)`)

func main(){

    someString:="1000abcd123"

    fmt.Println(digitsRegexp.FindStringSubmatch(someString))

上面代码输出：

[1000abcd123 1000 123]

例子2：使用带命名的正则表达式

package main

import(

    "fmt"

    "regexp"

var myExp=regexp.MustCompile(`(?P<first>\d+)\.(\d+).(?P<second>\d+)`)

func main(){

    fmt.Printf("%+v",myExp.FindStringSubmatch("1234.5678.9"))

上面代码输出，所有匹配到的都输出了：

[1234.5678.9 1234 5678 9]

这里的Named capturing groups  (?P<name>) 方式命名正则表达式是 python、Go语言特有的， java、c# 是 (?<name>) 命名方式。

例子3：对正则表达式类扩展一个获得所有命名信息的方法，并使用它。

package main

import(

    "fmt"

    "regexp"

//embed regexp.Regexp in a new type so we can extend it

type myRegexp struct{

    *regexp.Regexp

//add a new method to our new regular expression type

func(r *myRegexp)FindStringSubmatchMap(s string) map[string]string{

    captures:=make(map[string]string)

    match:=r.FindStringSubmatch(s)

    if match==nil{

        return captures

    for i,name:=range r.SubexpNames(){

        //Ignore the whole regexp match and unnamed groups

        if i==0||name==""{

            continue

        captures[name]=match[i]

    return captures

//an example regular expression

var myExp=myRegexp{regexp.MustCompile(`(?P<first>\d+)\.(\d+).(?P<second>\d+)`)}

func main(){

    mmap:=myExp.FindStringSubmatchMap("1234.5678.9")

    ww:=mmap["first"]

    fmt.Println(mmap)

    fmt.Println(ww)

上面代码的输出结果：

map[first:1234 second:9]

例子4，抓取限号信息，并记录到一个Map中。

package main

import(

    "fmt"

    iconv "github.com/djimenez/iconv-go"

    "io/ioutil"

    "net/http"

    "os"

    "regexp"

// embed regexp.Regexp in a new type so we can extend it

type myRegexp struct{

    *regexp.Regexp

// add a new method to our new regular expression type

func(r *myRegexp)FindStringSubmatchMap(s string)[](map[string]string){

    captures:=make([](map[string]string),0)

    matches:=r.FindAllStringSubmatch(s,-1)

    if matches==nil{

        return captures

    names:=r.SubexpNames()

    for _,match:=range matches{

        cmap:=make(map[string]string)

        for pos,val:=range match{

            name:=names[pos]

            if name==""{

                continue

/*

                fmt.Println("+++++++++")

                fmt.Println(name)

                fmt.Println(val)

*/

            cmap[name]=val

        captures=append(captures,cmap)

    return captures

// 抓取限号信息的正则表达式

var myExp=myRegexp{regexp.MustCompile(`自(?P<byear>[\d]{4})年(?P<bmonth>[\d]{1,2})月(?P<bday>[\d]{1,2})日至(?P<eyear>[\d]{4})年(?P<emonth>[\d]{1,2})月(?P<eday>[\d]{1,2})日，星期一至星期五限行机动车车牌尾号分别为：(?P<n11>[\d])和(?P<n12>[\d])、(?P<n21>[\d])和(?P<n22>[\d])、(?P<n31>[\d])和(?P<n32>[\d])、(?P<n41>[\d])和(?P<n42>[\d])、(?P<n51>[\d])和(?P<n52>[\d])`)}

func ErrorAndExit(err error){

    fmt.Fprintln(os.Stderr,err)

    os.Exit(1)

func main(){

    response,err:=http.Get("http://www.bjjtgl.gov.cn/zhuanti/10weihao/index.html")

    defer response.Body.Close()

    if err!=nil{

        ErrorAndExit(err)

    input,err:=ioutil.ReadAll(response.Body)

    if err!=nil{

        ErrorAndExit(err)

    body :=make([]byte,len(input))

    iconv.Convert(input,body,"gb2312","utf-8")

    mmap:=myExp.FindStringSubmatchMap(string(body))

    fmt.Println(mmap)

上述代码输出：

[map[n32:0 n22:9 emonth:7 n11:3 n41:1 n21:4 n52:7 bmonth:4 n51:2 bday:9 n42:6 byear:2012 eday:7 eyear:2012 n12:8 n31:5]

map[emonth:10 n41:5 n52:6 n31:4 byear:2012 n51:1 eyear:2012 n32:9 bmonth:7 n22:8 bday:8 n11:2 eday:6 n42:0 n21:3 n12:7]

map[bday:7 n51:5 n22:7 n31:3 eday:5 n32:8 byear:2012 bmonth:10 emonth:1 eyear:2013 n11:1 n12:6 n52:0 n21:2 n42:9 n41:4]

map[eyear:2013 byear:2013 n22:6 eday:10 bmonth:1 n41:3 n32:7 n31:2 n21:1 n11:5 bday:6 n12:0 n51:4 n42:8 emonth:4 n52:9]]

这段代码首先下载北京市交管局的网页；然后把这个gb2312的页面转换成utf-8编码，然后用正则表达式提取其中的限号信息。

来源：https://www.cnblogs.com/chu-12345/p/11378806.html

标签

string

python正则

字符串函数

main函数

python字符串

知识点 正则