ES 拼音前缀匹配的一种方案

南笙酒味 提交于 2020-03-08 22:20:39
环境7.1.1
PUT /pinyin2
{
    "settings" : {
        "analysis" : {
            "analyzer" : {
                "first_py_letter_analyzer" : {
                    "tokenizer" : "first_py_letter"
                },
                "full_pinyin_letter_analyzer" : {
                    "tokenizer" : "full_pinyin_letter"
                }
            },
            "tokenizer" : {
                "first_py_letter" : {
                    "type" : "pinyin",
                    "keep_first_letter" : true,
                    "keep_full_pinyin" : false,
                    "keep_original" : false,
                    "limit_first_letter_length" : 16,
                    "lowercase" : true,
                    "trim_whitespace" : true,
                    "keep_none_chinese_in_first_letter": false,
                    "none_chinese_pinyin_tokenize": false,
                    "keep_none_chinese": true,
                    "keep_none_chinese_in_joined_full_pinyin": true
                },
                "full_pinyin_letter" : {
                    "type": "pinyin",
                    "keep_separate_first_letter": false,
                    "keep_full_pinyin": false,
                    "keep_original": false,
                    "limit_first_letter_length": 16,
                    "lowercase": true,
                    "keep_first_letter": false,
                    "keep_none_chinese_in_first_letter": false,
                    "none_chinese_pinyin_tokenize": false,
                    "keep_none_chinese": true,
                    "keep_joined_full_pinyin": true,
                    "keep_none_chinese_in_joined_full_pinyin": true
                }
            }
        },
        "number_of_shards": 1,
        "number_of_replicas": 1
    },
    "mappings":{
            "properties": {
                "keyword": {
                    "properties": {
                        "keyword_ik": {
                            "type": "text",
                            "analyzer": "ik_max_word"
                        },
                        "keyword_pinyin": {
                            "type": "text",
                            "analyzer": "full_pinyin_letter_analyzer"
                        },
                        "keyword_first_py": {
                            "type": "text",
                            "analyzer": "first_py_letter_analyzer"
                        }
                    }
                },
                "weight": {
                    "type": "integer"
                }
            }
    }
}

POST /pinyin2/_doc
{
  "keyword": {
    "keyword_ik": "中国人",
    "keyword_pinyin": "中国人",
    "keyword_first_py": "中国人"
  },
  "weight": 2
}



POST /pinyin2/_analyze
{
  "text": ["你好"],
  "analyzer": "first_py_letter_analyzer"
}

POST /pinyin2/_analyze
{
  "text": ["你好"],
  "analyzer": "full_pinyin_letter_analyzer"
}

POST /pinyin2/_analyze
{
  "text": ["中国人"],
  "analyzer": "ik_max_word"
}



GET /pinyin2/_search
{
    "sort": [
        {
            "weight": "desc"
        },
        {
            "_score": "desc"
        }
    ],
    "query": {
        "bool": {
            "must": {
                "bool": {
                    "should": [
                      {
                            "match_phrase_prefix": {
                                "keyword.keyword_ik": {
                                    "query": "中国",
                                    "boost": 2
                                }
                            }
                        },
                        {
                            "match_phrase_prefix": {
                                "keyword.keyword_pinyin": {
                                    "query": "中国",
                                    "boost": 2
                                }
                            }
                        },
                        {
                            "match_phrase_prefix": {
                                "keyword.keyword_first_py": {
                                    "query": "中国",
                                    "boost": 1
                                }
                            }
                        }
                    ]
                }
            }
        }
    }
}

  修改自 https://github.com/medcl/elasticsearch-analysis-pinyin/issues/91

也可以使用suggester的前缀匹配来做 这种方案比较灵活

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!