ElasticSearch : Nested query not returning desired results

有些话、适合烂在心里 提交于 2021-02-05 09:28:16

问题


I am new to Elastic Search and facing some problems in implementing some queries. Following is my index structure

{
  "cl_1" : {
    "aliases" : { },
    "mappings" : {
      "properties" : {
        "@timestamp" : {
          "type" : "date"
        },
        "@version" : {
          "type" : "long"
        },
        "class" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "file" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "level" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "line_number" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "logger_name" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "mdc" : {
          "type" : "object"
        },
        "message" : {
          "properties" : {
            "connectionStats" : {
              "properties" : {
                "activeConnections" : {
                  "type" : "long"
                },
                "idleConnections" : {
                  "type" : "long"
                },
                "maxConnections" : {
                  "type" : "long"
                },
                "minConnections" : {
                  "type" : "long"
                },
                "pendingConnections" : {
                  "type" : "long"
                },
                "totalConnections" : {
                  "type" : "long"
                }
              }
            },
            "dbhistogram" : {
              "type" : "nested",
              "properties" : {
                "histogramName" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "snapshot" : {
                  "properties" : {
                    "max" : {
                      "type" : "float"
                    },
                    "mean" : {
                      "type" : "float"
                    },
                    "median" : {
                      "type" : "float"
                    },
                    "min" : {
                      "type" : "float"
                    },
                    "percentile75" : {
                      "type" : "float"
                    },
                    "percentile95" : {
                      "type" : "float"
                    },
                    "percentile98" : {
                      "type" : "float"
                    },
                    "percentile99" : {
                      "type" : "float"
                    },
                    "percentile99_9" : {
                      "type" : "float"
                    },
                    "sd" : {
                      "type" : "float"
                    }
                  }
                },
                "totalCount" : {
                  "type" : "long"
                }
              }
            },
            "healthy" : {
              "type" : "boolean"
            },
            "meters" : {
              "properties" : {
                "meterName" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "meterRate" : {
                  "properties" : {
                    "fifteenMinRate" : {
                      "type" : "float"
                    },
                    "fiveMinRate" : {
                      "type" : "float"
                    },
                    "meanRate" : {
                      "type" : "float"
                    },
                    "oneMinRate" : {
                      "type" : "float"
                    }
                  }
                },
                "totalCount" : {
                  "type" : "long"
                }
              }
            },
            "timers" : {
              "properties" : {
                "meterRate" : {
                  "properties" : {
                    "fifteenMinRate" : {
                      "type" : "float"
                    },
                    "fiveMinRate" : {
                      "type" : "float"
                    },
                    "meanRate" : {
                      "type" : "float"
                    },
                    "oneMinRate" : {
                      "type" : "float"
                    }
                  }
                },
                "snapshot" : {
                  "properties" : {
                    "max" : {
                      "type" : "float"
                    },
                    "mean" : {
                      "type" : "float"
                    },
                    "median" : {
                      "type" : "float"
                    },
                    "min" : {
                      "type" : "float"
                    },
                    "percentile75" : {
                      "type" : "float"
                    },
                    "percentile95" : {
                      "type" : "float"
                    },
                    "percentile98" : {
                      "type" : "float"
                    },
                    "percentile99" : {
                      "type" : "float"
                    },
                    "percentile99_9" : {
                      "type" : "float"
                    },
                    "sd" : {
                      "type" : "float"
                    }
                  }
                },
                "timerName" : {
                  "type" : "text",
                  "fields" : {
                    "keyword" : {
                      "type" : "keyword",
                      "ignore_above" : 256
                    }
                  }
                },
                "totalCount" : {
                  "type" : "long"
                }
              }
            }
          }
        },
        "method" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "source_host" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "thread_name" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    },
    "settings" : {
      "index" : {
        "creation_date" : "1590152167021",
        "number_of_shards" : "1",
        "number_of_replicas" : "1",
        "uuid" : "Aa-KrQinQyiY_racF-Kuxg",
        "version" : {
          "created" : "7070099"
        },
        "provided_name" : "cl_1"
      }
    }
  }
}

Following is the sample data

{
  "_index" : "cl_1",
  "_type" : "_doc",
  "_id" : "1545",
  "_version" : 1,
  "_seq_no" : 1544,
  "_primary_term" : 1,
  "found" : true,
  "_source" : {
    "@timestamp" : "2020-05-19T05:18:53.318Z",
    "source_host" : "127.0.0.1",
    "file" : "DBConnectionMetricsTask.java",
    "method" : "run",
    "level" : "INFO",
    "line_number" : "135",
    "thread_name" : "Timer-1",
    "@version" : 1,
    "logger_name" : "DBConnectionMetricsTask",
    "message" : {
      "counters" : [ ],
      "connectionStats" : [
        {
          "pendingConnections" : 0,
          "activeConnections" : 0,
          "idleConnections" : 3,
          "maxConnections" : 1000,
          "minConnections" : 2,
          "totalConnections" : 3
        }
      ],
      "timers" : [
        {
          "timerName" : "controller.pool.Wait",
          "totalCount" : 254246,
          "meterRate" : {
            "fiveMinRate" : 2.475461869555687,
            "fifteenMinRate" : 2.3810523391920393,
            "oneMinRate" : 2.472677418621866,
            "meanRate" : 5.486619709297191
          },
          "snapshot" : {
            "percentile75" : 2344164.0,
            "percentile95" : 3486516.0,
            "percentile98" : 7008980.0,
            "percentile99" : 1.0038282E7,
            "percentile99_9" : 4.8125997E7,
            "min" : 538.0,
            "max" : 4.8125997E7,
            "mean" : 2156193.438475141,
            "median" : 2023267.0,
            "sd" : 3169808.0894894
          }
        }
      ],
      "meters" : [
        {
          "totalCount" : 0,
          "meterName" : "controller.pool.ConnectionTimeoutRate",
          "meterRate" : {
            "fiveMinRate" : 0.0,
            "fifteenMinRate" : 0.0,
            "oneMinRate" : 0.0,
            "meanRate" : 0.0
          }
        }
      ],
      "dbhistogram" : [
        {
          "histogramName" : "controller.pool.ConnectionCreation",
          "totalCount" : 167,
          "snapshot" : {
            "percentile75" : 102.0,
            "percentile95" : 102.0,
            "percentile98" : 102.0,
            "percentile99" : 102.0,
            "percentile99_9" : 131.0,
            "min" : 62.0,
            "max" : 1234.0,
            "mean" : 100.04099508328834,
            "median" : 98.0,
            "sd" : 2.633890647922372
          }
        },
        {
          "histogramName" : "controller.pool.Usage",
          "totalCount" : 254246,
          "snapshot" : {
            "percentile75" : 1.0,
            "percentile95" : 9.0,
            "percentile98" : 36.0,
            "percentile99" : 89.0,
            "percentile99_9" : 178.0,
            "min" : 0.0,
            "max" : 772.0,
            "mean" : 3.5603336398866645,
            "median" : 1.0,
            "sd" : 15.699410900677693
          }
        }
      ],
      "healthy" : true
    },
    "class" : "DBConnectionMetricsTask",
    "mdc" : { }
  }
}

I want to query data in message.dbhistogram where histogramName : controller.pool.ConnectionCreation. As dbhistogram is an array of Json Objects, my purpose is to retrieve only that object from array, which passes above filter.

I have tried following two queries :

Query 1

GET cl_1/_search
{
   "_source": "message.dbhistogram.*", 
  "query": {
    "nested": {
      "path": "message.dbhistogram",
      "query": {
         "bool": {
            "must": [
                {
                  "term": {
                    "message.dbhistogram.histogramName.keyword": {
                      "value": "controller.pool.ConnectionCreation"
                    }
                  }
                }
              ]
            , "must_not": [
              {
                "term": {
                  "message.dbhistogram.histogramName.keyword": {
                    "value": "controller.pool.Usage"
                  }
                }
              }
            ]
         }
        }
      }
    }
  }

Query 2

GET cl_1/_search
{
  "_source": "message.dbhistogram.*", 
  "query": {
    "nested": {
      "path": "message.dbhistogram",
      "query": {
         "bool": {
            "must": [
                {
                  "match": {
                    "message.dbhistogram.histogramName": "controller.pool.ConnectionCreation"
                  }
                }
              ]
            , 
            "must_not": [
              {
                "match": {
                  "message.dbhistogram.histogramName.keyword": "controller.pool.Usage"
                }
              }
            ]
         }
        }
      }
    }
  }

My Expectations

{
   "took": 3,
   "timed_out": false,
   "_shards": {
      "total": 1,
      "successful": 1,
      "skipped": 0,
      "failed": 0
   },
   "hits": {
      "total": {
         "value": 1545,
         "relation": "eq"
      },
      "max_score": 0.6931471,
      "hits": [
         {
            "_index": "cl_1",
            "_type": "_doc",
            "_id": "57",
            "_score": 0.6931471,
            "_source": {
               "message": {
                  "dbhistogram": [
                     {
                        "totalCount": 10,
                        "histogramName": "controller.pool.ConnectionCreation",
                        "snapshot": {
                           "sd": 35.86065,
                           "percentile98": 270,
                           "percentile99": 270,
                           "min": 139,
                           "percentile99_9": 270,
                           "median": 270,
                           "percentile75": 270,
                           "max": 597,
                           "mean": 258.89444,
                           "percentile95": 270
                        }
                     }
                  ]
               }
            }
         }
      ]
   }
}

Reality

{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1545,
      "relation" : "eq"
    },
    "max_score" : 0.6931471,
    "hits" : [
      {
        "_index" : "cl_1",
        "_type" : "_doc",
        "_id" : "57",
        "_score" : 0.6931471,
        "_source" : {
          "message" : {
            "dbhistogram" : [
              {
                "totalCount" : 10,
                "histogramName" : "controller.pool.ConnectionCreation",
                "snapshot" : {
                  "sd" : 35.86065030723722,
                  "percentile98" : 270.0,
                  "percentile99" : 270.0,
                  "min" : 139.0,
                  "percentile99_9" : 270.0,
                  "median" : 270.0,
                  "percentile75" : 270.0,
                  "max" : 597.0,
                  "mean" : 258.89444519213833,
                  "percentile95" : 270.0
                }
              },
              {
                "totalCount" : 564,
                "histogramName" : "controller.pool.Usage",
                "snapshot" : {
                  "sd" : 51.226141120564726,
                  "percentile98" : 206.0,
                  "percentile99" : 236.0,
                  "min" : 0.0,
                  "percentile99_9" : 361.0,
                  "median" : 12.0,
                  "percentile75" : 22.0,
                  "max" : 56021.0,
                  "mean" : 23.500496339350214,
                  "percentile95" : 105.0
                }
              }
            ]
          }
        }
      }
    ]
  }
}

Please suggest, What I'm doing wrong ?


回答1:


@jaspreet is right -- although nested sub-documents are treated as separate documents, _source will still retrieve everything under a given path.

So use inner_hits and notice we mention them in the _source too:

GET cl_1/_search
{
  "_source": "inner_hits.message.dbhistogram.*",
  "query": {
    "nested": {
      "inner_hits": {},
      "path": "message.dbhistogram",
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "message.dbhistogram.histogramName": "controller.pool.ConnectionCreation"
              }
            }
          ],
          "must_not": [
            {
              "match": {
                "message.dbhistogram.histogramName.keyword": "controller.pool.Usage"
              }
            }
          ]
        }
      }
    }
  }
}

or use what's called named inner_hits in case you'll be using inner_hits multiple times in separate locations:

GET cl_1/_search
{
  "_source": "inner_hits__message_dbhistogram.message.dbhistogram.*",
  "query": {
    "nested": {
      "inner_hits": {
        "name": "inner_hits__message_dbhistogram"
      }, 
      "path": "message.dbhistogram",
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "message.dbhistogram.histogramName": "controller.pool.ConnectionCreation"
              }
            }
          ],
          "must_not": [
            {
              "match": {
                "message.dbhistogram.histogramName.keyword": "controller.pool.Usage"
              }
            }
          ]
        }
      }
    }
  }
}


来源:https://stackoverflow.com/questions/61960319/elasticsearch-nested-query-not-returning-desired-results

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!