elasticsearch mapping tokenizer keyword to avoid splitting tokens and enable use of wildcard

后端 未结 1 1149
无人及你
无人及你 2021-01-02 01:17

I try to make an autocomplete function with angularjs and elasticsearch on a given field, for example countryname. it can contain simple names like \"France\",

相关标签:
1条回答
  • 2021-01-02 01:55

    i found a working solution : the "keyword" tokenizer. create a custom analyzer and use it in the mapping for the field i want to keep without split by space :

        curl -XPUT 'localhost:9200/botanic/' -d '{
     "settings":{
         "index":{
            "analysis":{
               "analyzer":{
                  "keylower":{
                     "tokenizer":"keyword",
                     "filter":"lowercase"
                  }
               }
            }
         }
      },
      "mappings":{
            "specimens" : {
                "_all" : {"enabled" : true},
                "_index" : {"enabled" : true},
                "_id" : {"index": "not_analyzed", "store" : false},
                "properties" : {
                    "_id" : {"type" : "string", "store" : "no","index": "not_analyzed"  } ,
                ...
                    "LOCATIONID" : {"type" : "string",  "store" : "yes","index": "not_analyzed" } ,
                    "AVERAGEALTITUDEROUNDED" : {"type" : "string",  "store" : "yes","index": "analyzed" } ,
                    "CONTINENT" : {"type" : "string","analyzer":"keylower" } ,
                    "COUNTRYNAME" : {"type" : "string","analyzer":"keylower" } ,                
                    "COUNTRYCODE" : {"type" : "string", "store" : "yes","index": "analyzed" } ,
                    "COUNTY" : {"type" : "string","analyzer":"keylower" } ,
                    "LOCALITY" : {"type" : "string","analyzer":"keylower" }                 
                }
            }
        }
    }'
    

    so i can use wildcard in query on the field COUNTRYNAME, who is not splitted :

    curl -XGET 'localhost:9200/botanic/specimens/_search?size=10' -d '{
    "fields"  : ["COUNTRYNAME"],     
    "query": {"query_string" : {
                        "query": "COUNTRYNAME:bol*"
    }},
    "aggs" : {
        "general" : {
            "terms" : {
                "field" : "COUNTRYNAME", "size":0
            }
        }
    }}'
    

    the result :

    {
        "took" : 14,
        "timed_out" : false,
        "_shards" : {
            "total" : 5,
            "successful" : 5,
            "failed" : 0
        },
        "hits" : {
            "total" : 45,
            "max_score" : 1.0,
            "hits" : [{
                    "_index" : "botanic",
                    "_type" : "specimens",
                    "_id" : "91E7B53B61DF4E76BF70C780315A5DFD",
                    "_score" : 1.0,
                    "fields" : {
                        "COUNTRYNAME" : ["Bolivia, Plurinational State of"]
                    }
                }, {
                    "_index" : "botanic",
                    "_type" : "specimens",
                    "_id" : "7D811B5D08FF4F17BA174A3D294B5986",
                    "_score" : 1.0,
                    "fields" : {
                        "COUNTRYNAME" : ["Bolivia, Plurinational State of"]
                    }
                } ...
            ]
        },
        "aggregations" : {
            "general" : {
                "buckets" : [{
                        "key" : "bolivia, plurinational state of",
                        "doc_count" : 45
                    }
                ]
            }
        }
    }
    
    0 讨论(0)
提交回复
热议问题