fromJSON only reads first line in R - json
I've got a sample JSON file with about 500 tweets which I'd like to get into a dataframe.
The first three tweets from the JSON file are as follows (urls have been changed deliberately to fit within stackoverflow rules on links):
{"id":"tag:search.twitter.com,2005:413500801899044864","objectType":"activity","actor":{"objectType":"person","id":"id:twitter.com:860787127","link":"httpee://www.twitter.com/JoeGoodman11","displayName":"Joe Goodman","postedTime":"2012-10-04T03:18:54.000Z","image":"httpes://pbs.twimg.com/profile_images/3781305408/372be07ac2b312d35e1426b264891c4f_normal.jpeg","summary":null,"links":[{"href":null,"rel":"me"}],"friendsCount":21,"followersCount":18,"listedCount":0,"statusesCount":177,"twitterTimeZone":null,"verified":false,"utcOffset":null,"preferredUsername":"JoeGoodman11","languages":["en"],"favoritesCount":286},"verb":"post","postedTime":"2013-12-19T02:47:28.000Z","generator":{"displayName":"Twitter for Android","link":"httpee://twitter.com/download/android"},"provider":{"objectType":"service","displayName":"Twitter","link":"httpee://www.twitter.com"},"link":"httpee://twitter.com/JoeGoodman11/statuses/413500801899044864","body":"Hard at work studying for finals httpee://t.co/0EumsvUCuI","object":{"objectType":"note","id":"object:search.twitter.com,2005:413500801899044864","summary":"Hard at work studying for finals httpee://t.co/0EumsvUCuI","link":"httpee://twitter.com/JoeGoodman11/statuses/413500801899044864","postedTime":"2013-12-19T02:47:28.000Z"},"favoritesCount":0,"location":{"objectType":"place","displayName":"Lowell, MA","name":"Lowell","country_code":"United States","twitter_country_code":"US","link":"httpes://api.twitter.com/1.1/geo/id/d6539f049c4d05e8.json","geo":{"type":"Polygon","coordinates":[[[-71.382491,42.607189],[-71.382491,42.66676],[-71.271231,42.66676],[-71.271231,42.607189]]]}},"geo":{"type":"Point","coordinates":[42.6428357,-71.33654]},"twitter_entities":{"hashtags":[],"symbols":[],"urls":[],"user_mentions":[],"media":[{"id":413500801395736576,"id_str":"413500801395736576","indices":[33,55],"media_url":"httpee://pbs.twimg.com/media/Bb0Myb2IQAAaexg.jpg","media_url_https":"httpes://pbs.twimg.com/media/Bb0Myb2IQAAaexg.jpg","url":"httpee://t.co/0EumsvUCuI","display_url":"pic.twitter.com/0EumsvUCuI","expanded_url":"httpee://twitter.com/JoeGoodman11/status/413500801899044864/photo/1","type":"photo","sizes":{"medium":{"w":600,"h":339,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":340,"h":192,"resize":"fit"},"large":{"w":1023,"h":579,"resize":"fit"}}}]},"twitter_filter_level":"medium","twitter_lang":"en","retweetCount":0,"gnip":{"urls":[{"url":"httpee://t.co/0EumsvUCuI","expanded_url":"httpee://twitter.com/JoeGoodman11/status/413500801899044864/photo/1","expanded_status":200}],"language":{"value":"en"}}}
{"id":"tag:search.twitter.com,2005:413500803593547776","objectType":"activity","actor":{"objectType":"person","id":"id:twitter.com:168228121","link":"httpee://www.twitter.com/rvzigvdhiv","displayName":"Razi الرازي Gadhia","postedTime":"2010-07-18T19:28:45.000Z","image":"httpes://pbs.twimg.com/profile_images/412269827399495680/44JZWZPz_normal.jpeg","summary":"Why so serious? \n#2005spellingbeechamp \n#wood","links":[{"href":null,"rel":"me"}],"friendsCount":196,"followersCount":300,"listedCount":0,"statusesCount":4236,"twitterTimeZone":"Eastern Time (US & Canada)","verified":false,"utcOffset":"-18000","preferredUsername":"rvzigvdhiv","languages":["en"],"location":{"objectType":"place","displayName":"ATL"},"favoritesCount":4316},"verb":"post","postedTime":"2013-12-19T02:47:28.000Z","generator":{"displayName":"Twitter for iPhone","link":"http://twitter.com/download/iphone"},"provider":{"objectType":"service","displayName":"Twitter","link":"httpee://www.twitter.com"},"link":"httpee://twitter.com/rvzigvdhiv/statuses/413500803593547776","body":"#thellymon haha aight homie I'll let you know","object":{"objectType":"note","id":"object:search.twitter.com,2005:413500803593547776","summary":"#thellymon haha aight homie I'll let you know","link":"httpee://twitter.com/rvzigvdhiv/statuses/413500803593547776","postedTime":"2013-12-19T02:47:28.000Z"},"inReplyTo":{"link":"httpee://twitter.com/thellymon/statuses/413500370695229441"},"favoritesCount":0,"twitter_entities":{"hashtags":[],"symbols":[],"urls":[],"user_mentions":[{"screen_name":"thellymon","name":"","id":920010534,"id_str":"920010534","indices":[0,10]}]},"twitter_filter_level":"medium","twitter_lang":"en","retweetCount":0,"gnip":{"language":{"value":"en"},"profileLocations":[{"objectType":"place","geo":{"type":"point","coordinates":[-84.38798,33.749]},"address":{"country":"United States","countryCode":"US","locality":"Atlanta","region":"Georgia","subRegion":"Fulton County"},"displayName":"Atlanta, Georgia, United States"}]}}
{"id":"tag:search.twitter.com,2005:413500803597758464","objectType":"activity","actor":{"objectType":"person","id":"id:twitter.com:394373858","link":"httpee://www.twitter.com/Carly_Horse12","displayName":"Carly Sawyer","postedTime":"2011-10-19T23:56:56.000Z","image":"httpes://pbs.twimg.com/profile_images/378800000497869250/84266ccaf047be0cfbd8aeb73fe88544_normal.jpeg","summary":"Lindy Hopper. Theatre geek. Biology nerd. Christ follower. Creation lover. Dream chaser.","links":[{"href":null,"rel":"me"}],"friendsCount":398,"followersCount":197,"listedCount":1,"statusesCount":3220,"twitterTimeZone":"Quito","verified":false,"utcOffset":"-18000","preferredUsername":"Carly_Horse12","languages":["en"],"location":{"objectType":"place","displayName":"Charlottesville, VA"},"favoritesCount":662},"verb":"post","postedTime":"2013-12-19T02:47:28.000Z","generator":{"displayName":"Twitter for iPhone","link":"httpee://twitter.com/download/iphone"},"provider":{"objectType":"service","displayName":"Twitter","link":"httpee://www.twitter.com"},"link":"httpee://twitter.com/Carly_Horse12/statuses/413500803597758464","body":"And this concludes the yearly screening of \"It's A Wonder Life\" in it's usual fashion with Mom and me in shambles #tears","object":{"objectType":"note","id":"object:search.twitter.com,2005:413500803597758464","summary":"And this concludes the yearly screening of \"It's A Wonder Life\" in it's usual fashion with Mom and me in shambles #tears","link":"httpee://twitter.com/Carly_Horse12/statuses/413500803597758464","postedTime":"2013-12-19T02:47:28.000Z"},"favoritesCount":0,"twitter_entities":{"hashtags":[{"text":"tears","indices":[114,120]}],"symbols":[],"urls":[],"user_mentions":[]},"twitter_filter_level":"medium","twitter_lang":"en","retweetCount":0,"gnip":{"language":{"value":"en"},"profileLocations":[{"objectType":"place","geo":{"type":"point","coordinates":[-78.47668,38.02931]},"address":{"country":"United States","countryCode":"US","locality":"Charlottesville","region":"Virginia","subRegion":"City of Charlottesville"},"displayName":"Charlottesville, Virginia, United States"}]}}
I'm using the following R script:
library(rjson)
library(RCurl)
library(plyr)
raw_data<-('*filepath*/JSON test.json')
data<-fromJSON(paste(readLines(raw_data),collapse=""))
data
tweets<-data$body
tweets
which produces the following result - I only get the data for the first tweet
data<-fromJSON(paste(readLines(raw_data),collapse=""))
data
$id
[1] "tag:search.twitter.com,2005:413500801899044864"
$objectType
[1] "activity"
$actor
$actor$objectType
[1] "person"
$actor$id
[1] "id:twitter.com:860787127"
$actor$link
[1] "httpee://www.twitter.com/JoeGoodman11"
$actor$displayName
[1] "Joe Goodman"
$actor$postedTime
[1] "2012-10-04T03:18:54.000Z"
$actor$image
[1] "httpes://pbs.twimg.com/profile_images/3781305408/372be07ac2b312d35e1426b264891c4f_normal.jpeg"
$actor$summary
NULL
$actor$links
$actor$links[[1]]
$actor$links[[1]]$href
NULL
$actor$links[[1]]$rel
[1] "me"
$actor$friendsCount
[1] 21
$actor$followersCount
[1] 18
$actor$listedCount
[1] 0
$actor$statusesCount
[1] 177
$actor$twitterTimeZone
NULL
$actor$verified
[1] FALSE
$actor$utcOffset
NULL
$actor$preferredUsername
[1] "JoeGoodman11"
$actor$languages
[1] "en"
$actor$favoritesCount
[1] 286
$verb
[1] "post"
$postedTime
[1] "2013-12-19T02:47:28.000Z"
$generator
$generator$displayName
[1] "Twitter for Android"
$generator$link
[1] "httpee://twitter.com/download/android"
$provider
$provider$objectType
[1] "service"
$provider$displayName
[1] "Twitter"
$provider$link
[1] "httpee://www.twitter.com"
$link
[1] "httpee://twitter.com/JoeGoodman11/statuses/413500801899044864"
$body
[1] "Hard at work studying for finals http://t.co/0EumsvUCuI"
$object
$object$objectType
[1] "note"
$object$id
[1] "object:search.twitter.com,2005:413500801899044864"
$object$summary
[1] "Hard at work studying for finals http://t.co/0EumsvUCuI"
$object$link
[1] "httpee://twitter.com/JoeGoodman11/statuses/413500801899044864"
$object$postedTime
[1] "2013-12-19T02:47:28.000Z"
$favoritesCount
[1] 0
$location
$location$objectType
[1] "place"
$location$displayName
[1] "Lowell, MA"
$location$name
[1] "Lowell"
$location$country_code
[1] "United States"
$location$twitter_country_code
[1] "US"
$location$link
[1] "httpes://api.twitter.com/1.1/geo/id/d6539f049c4d05e8.json"
$location$geo
$location$geo$type
[1] "Polygon"
$location$geo$coordinates
$location$geo$coordinates[[1]]
$location$geo$coordinates[[1]][[1]]
[1] -71.38249 42.60719
$location$geo$coordinates[[1]][[2]]
[1] -71.38249 42.66676
$location$geo$coordinates[[1]][[3]]
[1] -71.27123 42.66676
$location$geo$coordinates[[1]][[4]]
[1] -71.27123 42.60719
$geo
$geo$type
[1] "Point"
$geo$coordinates
[1] 42.64284 -71.33654
$twitter_entities
$twitter_entities$hashtags
list()
$twitter_entities$symbols
list()
$twitter_entities$urls
list()
$twitter_entities$user_mentions
list()
$twitter_entities$media
$twitter_entities$media[[1]]
$twitter_entities$media[[1]]$id
[1] 4.135008e+17
$twitter_entities$media[[1]]$id_str
[1] "413500801395736576"
$twitter_entities$media[[1]]$indices
[1] 33 55
$twitter_entities$media[[1]]$media_url
[1] "httpee://pbs.twimg.com/media/Bb0Myb2IQAAaexg.jpg"
$twitter_entities$media[[1]]$media_url_https
[1] "httpes://pbs.twimg.com/media/Bb0Myb2IQAAaexg.jpg"
$twitter_entities$media[[1]]$url
[1] "httpee://t.co/0EumsvUCuI"
$twitter_entities$media[[1]]$display_url
[1] "pic.twitter.com/0EumsvUCuI"
$twitter_entities$media[[1]]$expanded_url
[1] "httpee://twitter.com/JoeGoodman11/status/413500801899044864/photo/1"
$twitter_entities$media[[1]]$type
[1] "photo"
$twitter_entities$media[[1]]$sizes
$twitter_entities$media[[1]]$sizes$medium
$twitter_entities$media[[1]]$sizes$medium$w
[1] 600
$twitter_entities$media[[1]]$sizes$medium$h
[1] 339
$twitter_entities$media[[1]]$sizes$medium$resize
[1] "fit"
$twitter_entities$media[[1]]$sizes$thumb
$twitter_entities$media[[1]]$sizes$thumb$w
[1] 150
$twitter_entities$media[[1]]$sizes$thumb$h
[1] 150
$twitter_entities$media[[1]]$sizes$thumb$resize
[1] "crop"
$twitter_entities$media[[1]]$sizes$small
$twitter_entities$media[[1]]$sizes$small$w
[1] 340
$twitter_entities$media[[1]]$sizes$small$h
[1] 192
$twitter_entities$media[[1]]$sizes$small$resize
[1] "fit"
$twitter_entities$media[[1]]$sizes$large
$twitter_entities$media[[1]]$sizes$large$w
[1] 1023
$twitter_entities$media[[1]]$sizes$large$h
[1] 579
$twitter_entities$media[[1]]$sizes$large$resize
[1] "fit"
$twitter_filter_level
[1] "medium"
$twitter_lang
[1] "en"
$retweetCount
[1] 0
$gnip
$gnip$urls
$gnip$urls[[1]]
$gnip$urls[[1]]$url
[1] "httpee://t.co/0EumsvUCuI"
$gnip$urls[[1]]$expanded_url
[1] "httpee://twitter.com/JoeGoodman11/status/413500801899044864/photo/1"
$gnip$urls[[1]]$expanded_status
[1] 200
$gnip$language
$gnip$language$value
[1] "en"
and
tweets<-data$body
tweets
[1] "Hard at work studying for finals http://t.co/0EumsvUCuI"
The aim is for tweets to show the body field for all 500 tweets. Any help very gratefully received!
Your paste call is just concatenating the individual lines without inserting the correct json separators. If you have something like
data <- fromJSON(sprintf("[%s]", paste(readLines(raw_data),collapse=",")))
then individual lines will get separated by a comma, and the whole thing will get wrapped in json's square-bracket notation for an array of objects. You can then extract a top-level property from each element of the data-array as
bodies <- sapply(data, "[[", "body")
Related
rvest returns empty list for html table
I really can't get my head around this problem and I would be grateful for any piece of advice that you could give me. I am trying to scrape the Bitcoin implied volatility index (BitVol) on this website: https://t3index.com/indices/bit-vol/ It is possible to show the raw values in the Chart via this button and by clicking on "View data table": The id of the relevant html table is "highcharts-data-table-1": I have used the rvest package to scrape this table. This is what I got so far: library(rvest) library(tidyverse) url5 <- "https://t3index.com/indices/bit-vol/" output <- url(url5) %>% read_html() %>% html_nodes(xpath='//*[#id="highcharts-data-table-1"]//table[1]') %>% html_table() The code runs smoothly without returning any errors but still the query returns an empty list in the variable output despite the fact that I have followed the recommendations in this article as well: rvest returning empty list This is the current R Version that I am using: $platform [1] "x86_64-w64-mingw32" $arch [1] "x86_64" $os [1] "mingw32" $system [1] "x86_64, mingw32" $status [1] "" $major [1] "4" $minor [1] "0.3" $year [1] "2020" $month [1] "10" $day [1] "10" $`svn rev` [1] "79318" $language [1] "R" $version.string [1] "R version 4.0.3 (2020-10-10)" $nickname [1] "Bunny-Wunnies Freak Out" Any help would be highly appreciated!
How to parse json with multiple keys in a column with R
I am new to R and now facing a problem parsing a json column in a dataset, I HAVE GONE THROUGH pretty much all the threads about parsing json, but I CANNOT find a proper solution...as I believe my problem is a little different: Here is my situation: I am using R to connect to a SQL database via ODBC && get a table I need: The TCbigdata is the target json column and the json looks like : { "memberid": "30325292", "hotelgroup": { "g_orders": "-1", "g_sfristcreatedate": "-1", "g_lastcreatedate": "-1", "g_slastcreatedate": "-1", "g_fristcreatedate": "-1" }, "visa": { "v_orders": "-1", "v_maxcountryid": "-1", "v_lastsorderdate": "-1", "v_maxvisaperson": "-1", "v_lastorderdate": "-1", "v_lastvisacountryid": "-1", "v_sorders": "-1" }, "callcentertel": { "lastcctzzycalldate": "-1", "ishavecctcomplaintcall": "-1", "lastcctchujingcalldate": "-1", "lastcctyouluncalldate": "-1" }....(key n, key n+1.. etc)..} ** My desire output would be all the nested vars , if possible, I want to DELETE memberid && hotelgroup && visa && callcentertel && etc such group keys , so, 1. parsing columns would be like " g_orders...v_orders..lastcct....etc" in one dataset without keys such as "hotelgroup","visa","callcentertel" ...etc...; 2. Or, parsing it into multiple datasets like "hotelgroup" table, COLUMN--"g_orders"+ "g_sfristcreatedate"..... "visa" table, COLUMN--"v_orders"+ "v_maxcountryid"..... I am not sure if there is a package for problem like this? ============ PROBLEM DESCRIPTION && DESIRE OUTPUT ================= I have searched several demonstrations using jsonlite/rjsonio/tidyjson , but failed to find a properway. **Another part I find confusing is, my dataset, which is from data warehouse via ODBC, return "factor" type of "TCbigdata", instead of "Character" as I assume: as what it is in DW: ================ MY CODE...TBC ======================== HERE IS MY CODE: # SQL TABLE orgtc <- sqlQuery(channel1,'SELECT idMemberInfo,memberid, refbizid, crttime, TCbigdata FROM tcbiz_fq_rcs_data.MemberInfo ') # Convert var_type orgjf$JFMemberPortrait<- as.character( orgjf$JFMemberPortrait ) # ????? ----library(jsonlite) l <- fromJSON(orgjf$JFMemberPortrait, simplifyDataFrame = FALSE) ---- TBD I appreciate your help!
Interesting question. There are really two pieces: getting the JSON out of the DW parsing the JSON into your desired output It looks like you have made decent progress getting the JSON out of the DW. I'm not sure what you are using to connect, but I would recommend using the new-ish odbc package, which has a nice DBI interface. (Remember that reproducible examples are important to getting help quickly). Once you have the data out of the DW, you should have something like the data_frame that I manufacture below. Further, if you want to use tidyjson (my preference), then you need to be aware that it is off of CRAN and the dev version at jeremystan/tidyjson has useful functionality (and is broken by the new dplyr). Here, I use the dev version from my repo: suppressPackageStartupMessages(library(tidyverse)) # devtools::install_github("colearendt/tidyjson") suppressPackageStartupMessages(library(tidyjson)) raw_json <- '{ "memberid": "30325292", "hotelgroup": { "g_orders": "-1", "g_sfristcreatedate": "-1", "g_lastcreatedate": "-1", "g_slastcreatedate": "-1", "g_fristcreatedate": "-1" }, "visa": { "v_orders": "-1", "v_maxcountryid": "-1", "v_lastsorderdate": "-1", "v_maxvisaperson": "-1", "v_lastorderdate": "-1", "v_lastvisacountryid": "-1", "v_sorders": "-1" }, "callcentertel": { "lastcctzzycalldate": "-1", "ishavecctcomplaintcall": "-1", "lastcctchujingcalldate": "-1", "lastcctyouluncalldate": "-1" } }' dw_data <- data_frame( idMemberInfo = c(1:10) , TCbigdata = as.character(lapply(c(1:10),function(x){return(raw_json)})) ) dw_data #> # A tibble: 10 x 2 #> idMemberInfo TCbigdata #> <int> <chr> #> 1 1 "{ … #> 2 2 "{ … #> 3 3 "{ … #> 4 4 "{ … #> 5 5 "{ … #> 6 6 "{ … #> 7 7 "{ … #> 8 8 "{ … #> 9 9 "{ … #> 10 10 "{ … # convert to tbl_json dw_json <- as.tbl_json(dw_data, json.column = "TCbigdata") # option 1 - let tidyjson do the work for you # - you will need to rename opt_1 <- dw_json %>% spread_all() names(opt_1) #> [1] "idMemberInfo" #> [2] "memberid" #> [3] "hotelgroup.g_orders" #> [4] "hotelgroup.g_sfristcreatedate" #> [5] "hotelgroup.g_lastcreatedate" #> [6] "hotelgroup.g_slastcreatedate" #> [7] "hotelgroup.g_fristcreatedate" #> [8] "visa.v_orders" #> [9] "visa.v_maxcountryid" #> [10] "visa.v_lastsorderdate" #> [11] "visa.v_maxvisaperson" #> [12] "visa.v_lastorderdate" #> [13] "visa.v_lastvisacountryid" #> [14] "visa.v_sorders" #> [15] "callcentertel.lastcctzzycalldate" #> [16] "callcentertel.ishavecctcomplaintcall" #> [17] "callcentertel.lastcctchujingcalldate" #> [18] "callcentertel.lastcctyouluncalldate" # for instance... as long as there are no conflicts rename_function <- function(x){ x[str_detect(x,"\\.")] <- str_sub(x[str_detect(x,"\\.")],str_locate(x[str_detect(x,"\\.")],"\\.")[,"start"]+1) return(x) } opt_1 %>% rename_all(.funs=list(rename_function)) %>% names() #> [1] "idMemberInfo" "memberid" #> [3] "g_orders" "g_sfristcreatedate" #> [5] "g_lastcreatedate" "g_slastcreatedate" #> [7] "g_fristcreatedate" "v_orders" #> [9] "v_maxcountryid" "v_lastsorderdate" #> [11] "v_maxvisaperson" "v_lastorderdate" #> [13] "v_lastvisacountryid" "v_sorders" #> [15] "lastcctzzycalldate" "ishavecctcomplaintcall" #> [17] "lastcctchujingcalldate" "lastcctyouluncalldate" # option 2 - define what you want # - more typing up front opt_2 <- dw_json %>% spread_values( g_orders = jstring(hotelgroup,g_orders) , g_sfristcreatedate = jstring(hotelgroup, g_sfristcreatedate) #... , lastcctzzycalldate = jstring(callcentertel, lastcctzzycalldate) #... ) names(opt_2) #> [1] "idMemberInfo" "g_orders" "g_sfristcreatedate" #> [4] "lastcctzzycalldate" Hope it helps! FWIW, I am hopeful of persisting the tidyjson-like behavior in the R community
Loading and converting json file in R
I have .json file which I need to load in R and perform further operations with it after converting it into a data-frame. Initials of my json file looks like this: {"_id":{"$oid":"57a30ce268fd0809ec4d194f"},"session":{"start_timestamp":{"$numberLong":"1470183490481"},"session_id":"def5faa9-20160803-001810481"},"metrics":{},"arrival_timestamp":{"$numberLong":"1470183523054"},"event_type":"OfferViewed","event_timestamp":{"$numberLong":"1470183505399"},"event_version":"3.0","application":{"package_name":"com.think.vito","title":"Vito","version_code":"5","app_id":"7ffa58dab3c646cea642e961ff8a8070","cognito_identity_pool_id":"us-east-1:4d9cf803-0487-44ec-be27-1e160d15df74","version_name":"2.0.0.0","sdk":{"version":"2.2.2","name":"aws-sdk-android"}},"client":{"cognito_id":"us-east-1:2e26918b-f7b1-471e-9df4-b931509f7d37","client_id":"ee0b61b0-85cf-4b2f-960e-e2aedef5faa9"},"device":{"locale":{"country":"US","code":"en_US","language":"en"},"platform":{"version":"5.1.1","name":"ANDROID"},"make":"YU","model":"AO5510"},"attributes":{"Category":"120000","CustomerID":"4078","OfferID":"45436"}} Above sample is just one id, session, metrics and there are many like that. I tried converting it using rjson library in R as follows. events_jason is the filename: library(rjson) result <- fromJSON(file = "events_json.json") print(result) $`_id` $`_id`$`$oid` [1] "57a30ce268fd0809ec4d194f" $session $session$start_timestamp $session$start_timestamp$`$numberLong` [1] "1470183490481" $session$session_id [1] "def5faa9-20160803-001810481" $metrics list() $arrival_timestamp $arrival_timestamp$`$numberLong` [1] "1470183523054" $event_type [1] "OfferViewed" $event_timestamp $event_timestamp$`$numberLong` [1] "1470183505399" $event_version [1] "3.0" $application $application$package_name [1] "com.think.vito" $application$title [1] "Vito" $application$version_code [1] "5" $application$app_id [1] "7ffa58dab3c646cea642e961ff8a8070" $application$cognito_identity_pool_id [1] "us-east-1:4d9cf803-0487-44ec-be27-1e160d15df74" $application$version_name [1] "2.0.0.0" $application$sdk $application$sdk$version [1] "2.2.2" $application$sdk$name [1] "aws-sdk-android" $client $client$cognito_id [1] "us-east-1:2e26918b-f7b1-471e-9df4-b931509f7d37" $client$client_id [1] "ee0b61b0-85cf-4b2f-960e-e2aedef5faa9" $device $device$locale $device$locale$country [1] "US" $device$locale$code [1] "en_US" $device$locale$language [1] "en" $device$platform $device$platform$version [1] "5.1.1" $device$platform$name [1] "ANDROID" $device$make [1] "YU" $device$model [1] "AO5510" $attributes $attributes$Category [1] "120000" $attributes$CustomerID [1] "4078" $attributes$OfferID [1] "45436" But it's just showing/reading the first row as I mentioned above. There are other more ids, session, metrics,event_type,etc which it is not showing. Please help how can i read my whole json file so that i can see other rows as well and covert it into a proper data frame. UPDATE: I have found the solution. Using ndjson package I am getting desired data frame. library(ndjson) df<-ndjson::stream_in('events_data.json')
Your file is not a single json object, but rather a list of json obejcts, one for each line. You have to read each line and convert each one from json. One way to do that is: d <- lapply(strsplit(readLines("events_data2.json"),"\n"), fromJSON) Hope this helps
Parse Multiple JSON Objects of Same Type in R
I have two objects of the same type in JSON: json <- '[{"client":"ABC Company","totalUSD":1870.0000,"durationDays":365,"familySize":4,"assignmentType":"Long Term","homeLocation":"Chicago, IL","hostLocation":"Lyon, France","serviceName":"Service ABC","homeLocationGeoLat":41.8781136,"homeLocationGeoLng":-87.6297982,"hostLocationGeoLat":45.764043,"hostLocationGeoLng":4.835659},{"client":"ABC Company","totalUSD":21082.0000,"durationDays":365,"familySize":4,"assignmentType":"Long Term","homeLocation":"Chicago, IL","hostLocation":"Lyon, France","serviceName":"Service ABC","homeLocationGeoLat":41.8781136,"homeLocationGeoLng":-87.6297982,"hostLocationGeoLat":45.764043,"hostLocationGeoLng":4.835659}]' How can I parse both objects unto the same data.frame such that I have two rows that share the same columns? To put that another way, I have a list of JSON objects that I am trying to parse into a data.frame. I have tried this: p <- rjson::newJSONParser() p$addData(json) df <- p$getObject() This seems to return a list whereas I am wanting a data.frame: > df [[1]] [[1]]$client [1] "ABC Company" [[1]]$totalUSD [1] 1870 [[1]]$durationDays [1] 365 [[1]]$familySize [1] 4 [[1]]$assignmentType [1] "Long Term" [[1]]$homeLocation [1] "Chicago, IL" [[1]]$hostLocation [1] "Lyon, France" [[1]]$serviceName [1] "Service ABC" [[1]]$homeLocationGeoLat [1] 41.87811 [[1]]$homeLocationGeoLng [1] -87.6298 [[1]]$hostLocationGeoLat [1] 45.76404 [[1]]$hostLocationGeoLng [1] 4.835659 [[2]] [[2]]$client [1] "ABC Company" [[2]]$totalUSD [1] 21082 [[2]]$durationDays [1] 365 [[2]]$familySize [1] 4 [[2]]$assignmentType [1] "Long Term" [[2]]$homeLocation [1] "Chicago, IL" [[2]]$hostLocation [1] "Lyon, France" [[2]]$serviceName [1] "Service ABC" [[2]]$homeLocationGeoLat [1] 41.87811 [[2]]$homeLocationGeoLng [1] -87.6298 [[2]]$hostLocationGeoLat [1] 45.76404 [[2]]$hostLocationGeoLng [1] 4.835659 How can I parse this list of JSON objects?
EDIT: In this case, you want do.call and rbind: do.call(rbind.data.frame, rjson::fromJSON(json)) or using your method: p <- rjson::newJSONParser() p$addData(json) df <- p$getObject() do.call(rbind, df)
Problems extracting variables from a JSON http-source with RJSONIO
I am trying to retrieve the longitude and latitude of a city. I have created a script that uses JSON via the openstreetmap.org. library("RJSONIO") CityName <- "Rotterdam" CountryCode <- "NL" CityName <- gsub(' ','%20',CityName) url <- paste( "http://nominatim.openstreetmap.org/search?city=" , CityName , "&countrycodes=" , CountryCode , "&limit=1&format=json" , sep="") x <- fromJSON(url,simplify=FALSE) x [[1]] [[1]]$place_id [1] "98036666" [[1]]$licence [1] "Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright" [[1]]$osm_type [1] "relation" [[1]]$osm_id [1] "324431" [[1]]$boundingbox [[1]]$boundingbox[[1]] [1] "51.842113494873" [[1]]$boundingbox[[2]] [1] "52.0045318603516" [[1]]$boundingbox[[3]] [1] "3.94075202941895" [[1]]$boundingbox[[4]] [1] "4.60184574127197" [[1]]$lat [1] "51.9228958" [[1]]$lon [1] "4.4631727" [[1]]$display_name [1] "Rotterdam, Stadsregio Rotterdam, Zuid-Holland, Nederland" [[1]]$class [1] "boundary" [[1]]$type [1] "administrative" [[1]]$icon [1] "http://nominatim.openstreetmap.org/images/mapicons/poi_boundary_administrative.p.20.png" I would like to extract the $lon and $lat, but when I do this I get the following error messages: x$lon NULL x$lat NULL Does anybody have an idea what I do wrong and therefore do not get the expected result, which would look like: x$lon 4.4631727 x$lat 51.9228958 Any suggestions? Thanks!
You need to access to the first list before: > x[[1]]$lat [1] "51.9228958" > x[[1]]$lon [1] "4.4631727"