问题
I am new to Elastic Search and facing some problems in implementing some queries. Following is my index structure
{
"cl_1" : {
"aliases" : { },
"mappings" : {
"properties" : {
"@timestamp" : {
"type" : "date"
},
"@version" : {
"type" : "long"
},
"class" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"file" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"level" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"line_number" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"logger_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"mdc" : {
"type" : "object"
},
"message" : {
"properties" : {
"connectionStats" : {
"properties" : {
"activeConnections" : {
"type" : "long"
},
"idleConnections" : {
"type" : "long"
},
"maxConnections" : {
"type" : "long"
},
"minConnections" : {
"type" : "long"
},
"pendingConnections" : {
"type" : "long"
},
"totalConnections" : {
"type" : "long"
}
}
},
"dbhistogram" : {
"type" : "nested",
"properties" : {
"histogramName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"snapshot" : {
"properties" : {
"max" : {
"type" : "float"
},
"mean" : {
"type" : "float"
},
"median" : {
"type" : "float"
},
"min" : {
"type" : "float"
},
"percentile75" : {
"type" : "float"
},
"percentile95" : {
"type" : "float"
},
"percentile98" : {
"type" : "float"
},
"percentile99" : {
"type" : "float"
},
"percentile99_9" : {
"type" : "float"
},
"sd" : {
"type" : "float"
}
}
},
"totalCount" : {
"type" : "long"
}
}
},
"healthy" : {
"type" : "boolean"
},
"meters" : {
"properties" : {
"meterName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"meterRate" : {
"properties" : {
"fifteenMinRate" : {
"type" : "float"
},
"fiveMinRate" : {
"type" : "float"
},
"meanRate" : {
"type" : "float"
},
"oneMinRate" : {
"type" : "float"
}
}
},
"totalCount" : {
"type" : "long"
}
}
},
"timers" : {
"properties" : {
"meterRate" : {
"properties" : {
"fifteenMinRate" : {
"type" : "float"
},
"fiveMinRate" : {
"type" : "float"
},
"meanRate" : {
"type" : "float"
},
"oneMinRate" : {
"type" : "float"
}
}
},
"snapshot" : {
"properties" : {
"max" : {
"type" : "float"
},
"mean" : {
"type" : "float"
},
"median" : {
"type" : "float"
},
"min" : {
"type" : "float"
},
"percentile75" : {
"type" : "float"
},
"percentile95" : {
"type" : "float"
},
"percentile98" : {
"type" : "float"
},
"percentile99" : {
"type" : "float"
},
"percentile99_9" : {
"type" : "float"
},
"sd" : {
"type" : "float"
}
}
},
"timerName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"totalCount" : {
"type" : "long"
}
}
}
}
},
"method" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"source_host" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"thread_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1590152167021",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "Aa-KrQinQyiY_racF-Kuxg",
"version" : {
"created" : "7070099"
},
"provided_name" : "cl_1"
}
}
}
}
Following is the sample data
{
"_index" : "cl_1",
"_type" : "_doc",
"_id" : "1545",
"_version" : 1,
"_seq_no" : 1544,
"_primary_term" : 1,
"found" : true,
"_source" : {
"@timestamp" : "2020-05-19T05:18:53.318Z",
"source_host" : "127.0.0.1",
"file" : "DBConnectionMetricsTask.java",
"method" : "run",
"level" : "INFO",
"line_number" : "135",
"thread_name" : "Timer-1",
"@version" : 1,
"logger_name" : "DBConnectionMetricsTask",
"message" : {
"counters" : [ ],
"connectionStats" : [
{
"pendingConnections" : 0,
"activeConnections" : 0,
"idleConnections" : 3,
"maxConnections" : 1000,
"minConnections" : 2,
"totalConnections" : 3
}
],
"timers" : [
{
"timerName" : "controller.pool.Wait",
"totalCount" : 254246,
"meterRate" : {
"fiveMinRate" : 2.475461869555687,
"fifteenMinRate" : 2.3810523391920393,
"oneMinRate" : 2.472677418621866,
"meanRate" : 5.486619709297191
},
"snapshot" : {
"percentile75" : 2344164.0,
"percentile95" : 3486516.0,
"percentile98" : 7008980.0,
"percentile99" : 1.0038282E7,
"percentile99_9" : 4.8125997E7,
"min" : 538.0,
"max" : 4.8125997E7,
"mean" : 2156193.438475141,
"median" : 2023267.0,
"sd" : 3169808.0894894
}
}
],
"meters" : [
{
"totalCount" : 0,
"meterName" : "controller.pool.ConnectionTimeoutRate",
"meterRate" : {
"fiveMinRate" : 0.0,
"fifteenMinRate" : 0.0,
"oneMinRate" : 0.0,
"meanRate" : 0.0
}
}
],
"dbhistogram" : [
{
"histogramName" : "controller.pool.ConnectionCreation",
"totalCount" : 167,
"snapshot" : {
"percentile75" : 102.0,
"percentile95" : 102.0,
"percentile98" : 102.0,
"percentile99" : 102.0,
"percentile99_9" : 131.0,
"min" : 62.0,
"max" : 1234.0,
"mean" : 100.04099508328834,
"median" : 98.0,
"sd" : 2.633890647922372
}
},
{
"histogramName" : "controller.pool.Usage",
"totalCount" : 254246,
"snapshot" : {
"percentile75" : 1.0,
"percentile95" : 9.0,
"percentile98" : 36.0,
"percentile99" : 89.0,
"percentile99_9" : 178.0,
"min" : 0.0,
"max" : 772.0,
"mean" : 3.5603336398866645,
"median" : 1.0,
"sd" : 15.699410900677693
}
}
],
"healthy" : true
},
"class" : "DBConnectionMetricsTask",
"mdc" : { }
}
}
I want to query data in message.dbhistogram
where histogramName : controller.pool.ConnectionCreation
. As dbhistogram
is an array of Json Objects, my purpose is to retrieve only that object from array, which passes above filter.
I have tried following two queries :
Query 1
GET cl_1/_search
{
"_source": "message.dbhistogram.*",
"query": {
"nested": {
"path": "message.dbhistogram",
"query": {
"bool": {
"must": [
{
"term": {
"message.dbhistogram.histogramName.keyword": {
"value": "controller.pool.ConnectionCreation"
}
}
}
]
, "must_not": [
{
"term": {
"message.dbhistogram.histogramName.keyword": {
"value": "controller.pool.Usage"
}
}
}
]
}
}
}
}
}
Query 2
GET cl_1/_search
{
"_source": "message.dbhistogram.*",
"query": {
"nested": {
"path": "message.dbhistogram",
"query": {
"bool": {
"must": [
{
"match": {
"message.dbhistogram.histogramName": "controller.pool.ConnectionCreation"
}
}
]
,
"must_not": [
{
"match": {
"message.dbhistogram.histogramName.keyword": "controller.pool.Usage"
}
}
]
}
}
}
}
}
My Expectations
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1545,
"relation": "eq"
},
"max_score": 0.6931471,
"hits": [
{
"_index": "cl_1",
"_type": "_doc",
"_id": "57",
"_score": 0.6931471,
"_source": {
"message": {
"dbhistogram": [
{
"totalCount": 10,
"histogramName": "controller.pool.ConnectionCreation",
"snapshot": {
"sd": 35.86065,
"percentile98": 270,
"percentile99": 270,
"min": 139,
"percentile99_9": 270,
"median": 270,
"percentile75": 270,
"max": 597,
"mean": 258.89444,
"percentile95": 270
}
}
]
}
}
}
]
}
}
Reality
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1545,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "cl_1",
"_type" : "_doc",
"_id" : "57",
"_score" : 0.6931471,
"_source" : {
"message" : {
"dbhistogram" : [
{
"totalCount" : 10,
"histogramName" : "controller.pool.ConnectionCreation",
"snapshot" : {
"sd" : 35.86065030723722,
"percentile98" : 270.0,
"percentile99" : 270.0,
"min" : 139.0,
"percentile99_9" : 270.0,
"median" : 270.0,
"percentile75" : 270.0,
"max" : 597.0,
"mean" : 258.89444519213833,
"percentile95" : 270.0
}
},
{
"totalCount" : 564,
"histogramName" : "controller.pool.Usage",
"snapshot" : {
"sd" : 51.226141120564726,
"percentile98" : 206.0,
"percentile99" : 236.0,
"min" : 0.0,
"percentile99_9" : 361.0,
"median" : 12.0,
"percentile75" : 22.0,
"max" : 56021.0,
"mean" : 23.500496339350214,
"percentile95" : 105.0
}
}
]
}
}
}
]
}
}
Please suggest, What I'm doing wrong ?
回答1:
@jaspreet is right -- although nested
sub-documents are treated as separate documents, _source
will still retrieve everything under a given path.
So use inner_hits and notice we mention them in the _source
too:
GET cl_1/_search
{
"_source": "inner_hits.message.dbhistogram.*",
"query": {
"nested": {
"inner_hits": {},
"path": "message.dbhistogram",
"query": {
"bool": {
"must": [
{
"match": {
"message.dbhistogram.histogramName": "controller.pool.ConnectionCreation"
}
}
],
"must_not": [
{
"match": {
"message.dbhistogram.histogramName.keyword": "controller.pool.Usage"
}
}
]
}
}
}
}
}
or use what's called named inner_hits
in case you'll be using inner_hits
multiple times in separate locations:
GET cl_1/_search
{
"_source": "inner_hits__message_dbhistogram.message.dbhistogram.*",
"query": {
"nested": {
"inner_hits": {
"name": "inner_hits__message_dbhistogram"
},
"path": "message.dbhistogram",
"query": {
"bool": {
"must": [
{
"match": {
"message.dbhistogram.histogramName": "controller.pool.ConnectionCreation"
}
}
],
"must_not": [
{
"match": {
"message.dbhistogram.histogramName.keyword": "controller.pool.Usage"
}
}
]
}
}
}
}
}
来源:https://stackoverflow.com/questions/61960319/elasticsearch-nested-query-not-returning-desired-results