I have what feels like a simple aggregation. I have a document that\'s timing code like so:
{
\"task_start\": \"2020-06-03T21:19:07.908821Z\",
\"task_end
This one's tricky -- already discussed here.
I think you'll have to resort a bit of scripted_metric
s and some method mocking because the exposed painless API is somewhat limited:
{
"size": 0,
"aggs": {
"task_metrics_median": {
"scripted_metric": {
"init_script": "state.ratios = new ArrayList();",
"map_script": """
// access the source incl. the nested subtasks
def d = params._source;
for (def subtask : d.sub_tasks) {
// mimicking a `term` query
if (subtask.key != 'sub-task1-time-milliseconds') break;
// incoming as strings so parse
def millis_end = ZonedDateTime.parse(d.task_end).toInstant().toEpochMilli();
def millis_start = ZonedDateTime.parse(d.task_start).toInstant().toEpochMilli();
double task_time = (millis_end - millis_start);
// prevent zero division
if (task_time <= 0) break;
state['ratios'].add(subtask.value / task_time);
}
""",
"combine_script": """
def ratios = state.ratios;
Collections.sort(ratios);
// trivial median calc
double median;
if (ratios.length % 2 == 0) {
median = ((double)ratios[ratios.length/2] + (double)ratios[ratios.length/2 - 1])/2;
} else {
median = (double) ratios[ratios.length/2];
}
return median
""",
"reduce_script": "return states"
}
}
}
}