Getting the closest string match

后端 未结 13 716
难免孤独
难免孤独 2020-11-22 10:57

I need a way to compare multiple strings to a test string and return the string that closely resembles it:

TEST STRING: THE BROWN FOX JUMPED OVER THE RED COW         


        
13条回答
  •  清酒与你
    2020-11-22 11:30

    Here you can have a golang POC for calculate the distances between the given words. You can tune the minDistance and difference for other scopes.

    Playground: https://play.golang.org/p/NtrBzLdC3rE

    package main
    
    import (
        "errors"
        "fmt"
        "log"
        "math"
        "strings"
    )
    
    var data string = `THE RED COW JUMPED OVER THE GREEN CHICKEN-THE RED COW JUMPED OVER THE RED COW-THE RED FOX JUMPED OVER THE BROWN COW`
    
    const minDistance float64 = 2
    const difference float64 = 1
    
    type word struct {
        data    string
        letters map[rune]int
    }
    
    type words struct {
        words []word
    }
    
    // Print prettify the data present in word
    func (w word) Print() {
        var (
            lenght int
            c      int
            i      int
            key    rune
        )
        fmt.Printf("Data: %s\n", w.data)
        lenght = len(w.letters) - 1
        c = 0
        for key, i = range w.letters {
            fmt.Printf("%s:%d", string(key), i)
            if c != lenght {
                fmt.Printf(" | ")
            }
            c++
        }
        fmt.Printf("\n")
    }
    
    func (ws words) fuzzySearch(data string) ([]word, error) {
        var (
            w      word
            err    error
            founds []word
        )
        w, err = initWord(data)
        if err != nil {
            log.Printf("Errors: %s\n", err.Error())
            return nil, err
        }
        // Iterating all the words
        for i := range ws.words {
            letters := ws.words[i].letters
            //
            var similar float64 = 0
            // Iterating the letters of the input data
            for key := range w.letters {
                if val, ok := letters[key]; ok {
                    if math.Abs(float64(val-w.letters[key])) <= minDistance {
                        similar += float64(val)
                    }
                }
            }
    
            lenSimilarity := math.Abs(similar - float64(len(data)-strings.Count(data, " ")))
            log.Printf("Comparing %s with %s i've found %f similar letter, with weight %f", data, ws.words[i].data, similar, lenSimilarity)
            if lenSimilarity <= difference {
                founds = append(founds, ws.words[i])
            }
        }
    
        if len(founds) == 0 {
            return nil, errors.New("no similar found for data: " + data)
        }
    
        return founds, nil
    }
    
    func initWords(data []string) []word {
        var (
            err   error
            words []word
            word  word
        )
        for i := range data {
            word, err = initWord(data[i])
            if err != nil {
                log.Printf("Error in index [%d] for data: %s", i, data[i])
            } else {
                words = append(words, word)
            }
        }
        return words
    
    }
    
    func initWord(data string) (word, error) {
        var word word
    
        word.data = data
        word.letters = make(map[rune]int)
        for _, r := range data {
            if r != 32 { // avoid to save the whitespace
                word.letters[r]++
            }
    
        }
        return word, nil
    }
    func main() {
        var ws words
        words := initWords(strings.Split(data, "-"))
        for i := range words {
            words[i].Print()
        }
        ws.words = words
    
        solution, _ := ws.fuzzySearch("THE BROWN FOX JUMPED OVER THE RED COW")
        fmt.Println("Possible solutions: ", solution)
    
    }
    
    

提交回复
热议问题