问题
I need to be able to extract these fields from this file that has many json entries:
sender: Hostname
mem:used_p
cpu: user_p
load: load_5
//
cat tmp.txt
{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}
{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}
{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}
I can extract data within payloadData section like this:
df <- jsonlite::fromJSON(paste0("[",paste0(readLines("c:/tmp.txt"),collapse=","),"]"))$payloadData[c("timestamp","count")]
but there are nested objects within payloadData section, how would I extract the data in nested section in json data, from the mem, cpu, load section?
回答1:
One way is to use tidyjson
:
library(tidyjson)
library(magrittr)
json <- '{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}'
json %>%
enter_object("payloadData") %>%
spread_values(send_host = jstring("sender", "hostname"),
mem_used_p = jstring("mem", "used_p"),
cpu_user_p = jstring("cpu", "user_p"),
load_load_5 = jstring("load","load5"))
# document.id send_host mem_used_p cpu_user_p load_load_5
# 1 1 server1 0.6242380717975277 0.18809078763071663 1.2
Alternatively, you could stack each set of keys:
payload <- json %>% enter_object("payloadData")
sender_keys <- payload %>%
enter_object("sender") %>%
gather_keys() %>%
append_values_string()
mem_keys <- payload %>%
enter_object("mem") %>%
gather_keys() %>%
append_values_string()
cpu_keys <- payload %>%
enter_object("cpu") %>%
gather_keys() %>%
append_values_string()
load_keys <- payload %>%
enter_object("load") %>%
gather_keys() %>%
append_values_string()
来源:https://stackoverflow.com/questions/36629362/how-do-you-extract-data-from-nested-json-data-in-r