问题
If R is not suitable for this job then fair enough but I believe it should be.
I am calling an API, then dumping the results into Postman json reader. Then I get results like:
"results": [
{
"personUuid": "***",
"synopsis": {
"fullName": "***",
"headline": "***",
"location": "***",
"image": "***",
"skills": [
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers": [
"***",
"***"
],
"emailAddresses": [
"***"
],
"networks": [
{
"name": "linkedin",
"url": "***",
"type": "canonicalUrl",
"lastAccessed": null
},
{
"name": "***",
"url": "***",
"type": "cvUrl",
"lastAccessed": "*"
},
{
"name": "*",
"url": "***",
"type": "cvUrl",
"lastAccessed": "*"
}
]
}
},
{
Firstly I'm not sure on how to import this into R as I've mainly dealt with csv's. I've seen other questions where people use Json packages to call the URL directly but that's not going to work with what I'm doing so I'd like to know how to read a csv with json in it.
I used:
x <- fromJSON(file="Z:/json.csv")
But perhaps theres a better way. Once this is done the json looks more like:
...$results[[9]]$synopsis$emailAddresses
[1] "***" "***"
[3] "***" "***"
$results[[9]]$synopsis$networks...
Then what I would like for each result is to store the headline and then email address into a data table.
I tried:
str_extract_all(x, 'emailAddresses*$')
However I figured * would represent everything between emailAddresses and the $ including new lines etc, however this doesn't work. I also find with extract when you do get * to work, it doesnt extract what * represents.
eg:
> y <- 'some text. email "oli@oli.o" other text'
> y
[1] "some text. email \"oli@oli.o\" other text"
> str_extract_all(y, 'email \"*"')
[[1]]
[1] "email \""
PART 2:
The answers below worked, however if I call the api directly:
body ='{"start": 0,"count": 105,...}'
x <- POST(url="https://live.*.me/api/v3/person", body=body, add_headers(Accept="application/json", 'Content-Type'="application/json", Authorization = "id=*, apiKey=*"))
y <- content(x)
Then using
fromJSON(y, flatten=TRUE)$results[c("synopsis.headline",
"synopsis.emailAddresses")]
Does not work. I tried the following:
z <- NULL
zz <- NULL
for(i in 1:y$count){
z=rbind(z,data.table(job = y$results[[i]]$synopsis$headline))
}
for(i in 1:y$count){
zz=rbind(zz,data.table(job = y$results[[i]]$synopsis$emailAddresses))
}
df <- cbind(z,zz)
However when the JSON list is returned, some people have multiple emails. Thus the method above only records the first email for each person, how would I save the multi emails as a vector (rather than having multiple columns)?
回答1:
UPDATE 1: to read the json from a URL you can simply use the fromJSON function, passing the string with your json data url:
library(jsonlite)
url <- 'http://you.url.com/data.json'
# in this case we pass an URL to the fromJSON function instead of the actual content we want to parse
fromJSON(url, flatten=TRUE)$results[c("synopsis.headline", "synopsis.emailAddresses")]
// end UPDATE 1
you could also pass the flatten param to fromJSON and then use the 'results' dataframe.
fromJSON(json.data, flatten=TRUE)$results[c("synopsis.headline",
"synopsis.emailAddresses")]
synopsis.headline synopsis.emailAddresses
1 *** jane.doe@boo.com
2 *** john.doe@foo.com
here is how I defined json.data, please note I intentionally added 1 more record to your sample input json.
json.data <- '{
"results":[
{
"personUuid":"***",
"synopsis":{
"fullName":"***",
"headline":"***",
"location":"***",
"image":"***",
"skills":[
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers":[
"***",
"***"
],
"emailAddresses":[
"jane.doe@boo.com"
],
"networks":[
{
"name":"linkedin",
"url":"***",
"type":"canonicalUrl",
"lastAccessed":null
},
{
"name":"***",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
},
{
"name":"*",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
}
]
}
},
{
"personUuid":"***",
"synopsis":{
"fullName":"***",
"headline":"***",
"location":"***",
"image":"***",
"skills":[
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers":[
"***",
"***"
],
"emailAddresses":[
"john.doe@foo.com"
],
"networks":[
{
"name":"linkedin",
"url":"***",
"type":"canonicalUrl",
"lastAccessed":null
},
{
"name":"***",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
},
{
"name":"*",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
}
]
}
}
]
}'
回答2:
Additional test data might be helpful.
Consider:
library(jsonlite)
library(dplyr)
json_data = "{\"results\": [\n {\n\"personUuid\": \"***\",\n\"synopsis\": {\n\"fullName\": \"***\",\n\"headline\": \"***\",\n\"location\": \"***\",\n\"image\": \"***\",\n\"skills\": [\n\"*\",\n\"*\",\n\"*\",\n\"*.\",\n\"*\"\n],\n\"phoneNumbers\": [\n\"***\",\n\"***\"\n],\n\"emailAddresses\": [\n\"***\"\n],\n\"networks\": [\n{\n \"name\": \"linkedin\",\n \"url\": \"***\",\n \"type\": \"canonicalUrl\",\n \"lastAccessed\": null\n},\n {\n \"name\": \"***\",\n \"url\": \"***\",\n \"type\": \"cvUrl\",\n \"lastAccessed\": \"*\"\n },\n {\n \"name\": \"*\",\n \"url\": \"***\",\n \"type\": \"cvUrl\",\n \"lastAccessed\": \"*\"\n }\n ]\n}\n}]}"
(df <- jsonlite::fromJSON(json_data, simplifyDataFrame = TRUE, flatten = TRUE))
#> $results
#> personUuid synopsis.fullName synopsis.headline synopsis.location
#> 1 *** *** *** ***
#> synopsis.image synopsis.skills synopsis.phoneNumbers
#> 1 *** *, *, *, *., * ***, ***
#> synopsis.emailAddresses
#> 1 ***
#> synopsis.networks
#> 1 linkedin, ***, *, ***, ***, ***, canonicalUrl, cvUrl, cvUrl, NA, *, *
df$results %>%
select(headline = synopsis.headline, emails = synopsis.emailAddresses)
#> headline emails
#> 1 *** ***
来源:https://stackoverflow.com/questions/40045080/r-read-and-parse-json