Encode / decode URLs

后端 未结 8 1484
攒了一身酷
攒了一身酷 2020-11-28 03:46

What\'s the recommended way of encoding and decoding entire URLs in Go? I am aware of the methods url.QueryEscape and url.QueryUnescape, but they d

相关标签:
8条回答
  • 2020-11-28 04:17

    Here's an implementation of escape and unescape (ripped from go source):

    package main
    
    
    import (  
        "fmt"
        "strconv"
    )
    
    
    const (
        encodePath encoding = 1 + iota
        encodeHost
        encodeUserPassword
        encodeQueryComponent
        encodeFragment
    )
    
    type encoding int
    type EscapeError string
    
    func (e EscapeError) Error() string {
        return "invalid URL escape " + strconv.Quote(string(e))
    }
    
    
    func ishex(c byte) bool {
        switch {
        case '0' <= c && c <= '9':
            return true
        case 'a' <= c && c <= 'f':
            return true
        case 'A' <= c && c <= 'F':
            return true
        }
        return false
    }
    
    func unhex(c byte) byte {
        switch {
        case '0' <= c && c <= '9':
            return c - '0'
        case 'a' <= c && c <= 'f':
            return c - 'a' + 10
        case 'A' <= c && c <= 'F':
            return c - 'A' + 10
        }
        return 0
    }
    
    
    
    // Return true if the specified character should be escaped when
    // appearing in a URL string, according to RFC 3986.
    //
    // Please be informed that for now shouldEscape does not check all
    // reserved characters correctly. See golang.org/issue/5684.
    func shouldEscape(c byte, mode encoding) bool {
        // §2.3 Unreserved characters (alphanum)
        if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
            return false
        }
    
        if mode == encodeHost {
            // §3.2.2 Host allows
            //  sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
            // as part of reg-name.
            // We add : because we include :port as part of host.
            // We add [ ] because we include [ipv6]:port as part of host
            switch c {
            case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']':
                return false
            }
        }
    
        switch c {
        case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
            return false
    
        case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
            // Different sections of the URL allow a few of
            // the reserved characters to appear unescaped.
            switch mode {
            case encodePath: // §3.3
                // The RFC allows : @ & = + $ but saves / ; , for assigning
                // meaning to individual path segments. This package
                // only manipulates the path as a whole, so we allow those
                // last two as well. That leaves only ? to escape.
                return c == '?'
    
            case encodeUserPassword: // §3.2.1
                // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
                // userinfo, so we must escape only '@', '/', and '?'.
                // The parsing of userinfo treats ':' as special so we must escape
                // that too.
                return c == '@' || c == '/' || c == '?' || c == ':'
    
            case encodeQueryComponent: // §3.4
                // The RFC reserves (so we must escape) everything.
                return true
    
            case encodeFragment: // §4.1
                // The RFC text is silent but the grammar allows
                // everything, so escape nothing.
                return false
            }
        }
    
        // Everything else must be escaped.
        return true
    }
    
    
    
    
    func escape(s string, mode encoding) string {
        spaceCount, hexCount := 0, 0
        for i := 0; i < len(s); i++ {
            c := s[i]
            if shouldEscape(c, mode) {
                if c == ' ' && mode == encodeQueryComponent {
                    spaceCount++
                } else {
                    hexCount++
                }
            }
        }
    
        if spaceCount == 0 && hexCount == 0 {
            return s
        }
    
        t := make([]byte, len(s)+2*hexCount)
        j := 0
        for i := 0; i < len(s); i++ {
            switch c := s[i]; {
            case c == ' ' && mode == encodeQueryComponent:
                t[j] = '+'
                j++
            case shouldEscape(c, mode):
                t[j] = '%'
                t[j+1] = "0123456789ABCDEF"[c>>4]
                t[j+2] = "0123456789ABCDEF"[c&15]
                j += 3
            default:
                t[j] = s[i]
                j++
            }
        }
        return string(t)
    }
    
    
    // unescape unescapes a string; the mode specifies
    // which section of the URL string is being unescaped.
    func unescape(s string, mode encoding) (string, error) {
        // Count %, check that they're well-formed.
        n := 0
        hasPlus := false
        for i := 0; i < len(s); {
            switch s[i] {
            case '%':
                n++
                if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
                    s = s[i:]
                    if len(s) > 3 {
                        s = s[:3]
                    }
                    return "", EscapeError(s)
                }
                i += 3
            case '+':
                hasPlus = mode == encodeQueryComponent
                i++
            default:
                i++
            }
        }
    
        if n == 0 && !hasPlus {
            return s, nil
        }
    
        t := make([]byte, len(s)-2*n)
        j := 0
        for i := 0; i < len(s); {
            switch s[i] {
            case '%':
                t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
                j++
                i += 3
            case '+':
                if mode == encodeQueryComponent {
                    t[j] = ' '
                } else {
                    t[j] = '+'
                }
                j++
                i++
            default:
                t[j] = s[i]
                j++
                i++
            }
        }
        return string(t), nil
    }
    
    
    func EncodeUriComponent(rawString string) string{
        return escape(rawString, encodeFragment)
    }
    
    func DecodeUriCompontent(encoded string) (string, error){
        return unescape(encoded, encodeQueryComponent)
    }
    
    
    // https://golang.org/src/net/url/url.go
    // http://remove-line-numbers.ruurtjan.com/
    func main() {
        // http://www.url-encode-decode.com/
        origin := "äöüHel/lo world"
        encoded := EncodeUriComponent(origin)
        fmt.Println(encoded)
    
        s, _ := DecodeUriCompontent(encoded)
        fmt.Println(s)
    }
    


    // -------------------------------------------------------
    
    /*
    func UrlEncoded(str string) (string, error) {
        u, err := url.Parse(str)
        if err != nil {
            return "", err
        }
        return u.String(), nil
    }
    
    
    // http://stackoverflow.com/questions/13820280/encode-decode-urls
    // import "net/url"
    func old_main() {
        a,err := UrlEncoded("hello world")
        if err != nil {
               fmt.Println(err)
        }
        fmt.Println(a)
    
        // https://gobyexample.com/url-parsing
        //s := "postgres://user:pass@host.com:5432/path?k=v#f"
        s := "postgres://user:pass@host.com:5432/path?k=vbla%23fooa#f"
        u, err := url.Parse(s)
        if err != nil {
            panic(err)
        }
    
    
        fmt.Println(u.RawQuery)
        fmt.Println(u.Fragment)
        fmt.Println(u.String())
        m, _ := url.ParseQuery(u.RawQuery)
        fmt.Println(m)
        fmt.Println(m["k"][0])
    
    }
    */
    
    // -------------------------------------------------------
    
    0 讨论(0)
  • 2020-11-28 04:23

    From MDN on encodeURIComponent:

    encodeURIComponent escapes all characters except the following: alphabetic, decimal digits, '-', '_', '.', '!', '~', '*', ''', '(', ')'

    From Go's implementation of url.QueryEscape (specifically, the shouldEscape private function), escapes all characters except the following: alphabetic, decimal digits, '-', '_', '.', '~'.

    Unlike Javascript, Go's QueryEscape() will escape '!', '*', ''', '(', ')'. Basically, Go's version is strictly RFC-3986 compliant. Javascript's is looser. Again from MDN:

    If one wishes to be more stringent in adhering to RFC 3986 (which reserves !, ', (, ), and *), even though these characters have no formalized URI delimiting uses, the following can be safely used:

    function fixedEncodeURIComponent (str) {
      return encodeURIComponent(str).replace(/[!'()]/g, escape).replace(/\*/g, "%2A");
    }
    
    0 讨论(0)
  • 2020-11-28 04:29

    You can do all the URL encoding you want with the net/url module. It doesn't break out the individual encoding functions for the parts of the URL, you have to let it construct the whole URL. Having had a squint at the source code I think it does a very good and standards compliant job.

    Here is an example (playground link)

    package main
    
    import (
        "fmt"
        "net/url"
    )
    
    func main() {
    
        Url, err := url.Parse("http://www.example.com")
        if err != nil {
            panic("boom")
        }
    
        Url.Path += "/some/path/or/other_with_funny_characters?_or_not/"
        parameters := url.Values{}
        parameters.Add("hello", "42")
        parameters.Add("hello", "54")
        parameters.Add("vegetable", "potato")
        Url.RawQuery = parameters.Encode()
    
        fmt.Printf("Encoded URL is %q\n", Url.String())
    }
    

    Which prints

    Encoded URL is "http://www.example.com/some/path/or/other_with_funny_characters%3F_or_not/?vegetable=potato&hello=42&hello=54"
    
    0 讨论(0)
  • 2020-11-28 04:31

    For mimicking Javascript's encodeURIComponent(), I created a string helper function.

    Example: Turns "My String" to "My%20String"

    https://github.com/mrap/stringutil/blob/master/urlencode.go

    import "net/url"
    
    // UrlEncoded encodes a string like Javascript's encodeURIComponent()
    func UrlEncoded(str string) (string, error) {
        u, err := url.Parse(str)
        if err != nil {
            return "", err
        }
        return u.String(), nil
    }
    
    0 讨论(0)
  • 2020-11-28 04:32

    Hope this helps

     // url encoded
    func UrlEncodedISO(str string) (string, error) {
        u, err := url.Parse(str)
        if err != nil {
            return "", err
        }
        q := u.Query()
        return q.Encode(), nil
    }
    
     * encoded into %2A
     # encoded into %23
     % encoded into %25
     < encoded into %3C
     > encoded into %3E
     + encoded into %2B
     enter key (#13#10) is encoded into %0D%0A
    
    0 讨论(0)
  • 2020-11-28 04:33

    As of Go 1.8, this situation has changed. We now have access to PathEscape in addition to the older QueryEscape to encode path components, along with the unescape counterpart PathUnescape.

    0 讨论(0)
提交回复
热议问题