How to read nested JSON structure? - json

I have some JSON that looks like this:
"total_rows":141,"offset":0,"rows":[
{"id":"1","key":"a","value":{"SP$Sale_Price":"240000","CONTRACTDATE$Contract_Date":"2006-10-26T05:00:00"}},
{"id":"2","key":"b","value":{"SP$Sale_Price":"2000000","CONTRACTDATE$Contract_Date":"2006-08-22T05:00:00"}},
{"id":"3","key":"c","value":{"SP$Sale_Price":"780000","CONTRACTDATE$Contract_Date":"2007-01-18T06:00:00"}},
...
In R, what would be the easiest way to produce a scatter-plot of SP$Sale_Price versus CONTRACTDATE$Contract_Date?
I got this far:
install.packages("rjson")
library("rjson")
json_file <- "http://localhost:5984/testdb/_design/sold/_view/sold?limit=100"
json_data <- fromJSON(file=json_file)
install.packages("plyr")
library(plyr)
asFrame <- do.call("rbind.fill", lapply(json_data, as.data.frame))
but now I'm stuck...
> plot(CONTRACTDATE$Contract_Date, SP$Sale_Price)
Error in plot(CONTRACTDATE$Contract_Date, SP$Sale_Price) :
object 'CONTRACTDATE' not found
How to make this work?

Suppose you have the following JSON-file:
txt <- '{"total_rows":141,"offset":0,"rows":[
{"id":"1","key":"a","value":{"SP$Sale_Price":"240000","CONTRACTDATE$Contract_Date":"2006-10-26T05:00:00"}},
{"id":"2","key":"b","value":{"SP$Sale_Price":"2000000","CONTRACTDATE$Contract_Date":"2006-08-22T05:00:00"}},
{"id":"3","key":"c","value":{"SP$Sale_Price":"780000","CONTRACTDATE$Contract_Date":"2007-01-18T06:00:00"}}]}'
Then you can read it as follows with the jsonlite package:
library(jsonlite)
json_data <- fromJSON(txt, flatten = TRUE)
# get the needed dataframe
dat <- json_data$rows
# set convenient names for the columns
# this step is optional, it just gives you nicer columnnames
names(dat) <- c("id","key","sale_price","contract_date")
# convert the 'contract_date' column to a datetime format
dat$contract_date <- strptime(dat$contract_date, format="%Y-%m-%dT%H:%M:%S", tz="GMT")
Now you can plot:
plot(dat$contract_date, dat$sale_price)
Which gives:
If you choose not to flatten the JSON, you can do:
json_data <- fromJSON(txt)
dat <- json_data$rows$value
sp <- strtoi(dat$`SP$Sale_Price`)
cd <- strptime(dat$`CONTRACTDATE$Contract_Date`, format="%Y-%m-%dT%H:%M:%S", tz="GMT")
plot(cd,sp)
Which gives the same plot:

I found a way that doesn't discard the field names:
install.packages("jsonlite")
install.packages("curl")
json <- fromJSON(json_file)
r <- json$rows
At this point r looks like this:
> class(r)
[1] "data.frame"
> colnames(r)
[1] "id" "key" "value"
After some more Googling and trial-and-error I landed on this:
f <- r$value
sp <- strtoi(f[["SP$Sale_Price"]])
cd <- strptime(f[["CONTRACTDATE$Contract_Date"]], format="%Y-%m-%dT%H:%M:%S", tz="GMT")
plot(cd,sp)
And the result on my full data-set...

Related

Reading complex json data as dataframe in R

I have the following json data:
json_data <- data.frame(changedContent=c('{"documents":[],"images":[],"profileCommunications":[],"shortListedProfiles":[],"matrimonyUser":{"createdBy":null,"parentMatrimonyUserId":0,"userSalutationVal":"Mr.","matrimonyUserCode":"173773","matrimonyUserName":"SUDIPTO DEB BARMAN","emailAddress":"sudipto06#yahoo.com","contactNumber":"9434944429","emailOTP":"","mobilePhoneOTP":"","isEmailOTPVerified":1,"isMobilePhoneOTPverified":1,"isHideContact":null,"isHideEmail":null,"lastLogInTime":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133028,"isDeleted":null,"isActive":1,"isAllowedLogin":null,"numberOfChildProfile":null,"matrimonyUserTypeId":100000006,"matrimonyUserTypeVal":"Online Customer","onlineStatusFlag":null,"lastSystemTransactionDateTime":null,"isLive":null,"mobileCountryCode":0,"userStatusIdValue":"Registered and Verified","crmUserStatusIdValue":null,"deactivateReasonIdValue":null,"deactivateReason":null,"matrimonyUserId":165614,"userSalutationId":100001617,"userStatusId":100002760,"crmUserStatusId":null,"deactivateReasonId":null,"createdOn":null},"aboutMes":[],"partnerPreference":{"isSubcastDealbreaker":null,"isOccupationDealbreaker":null,"isIndustryDealbreaker":null,"isIncomeDealbreaker":null,"isHeightDealbreaker":null,"isBodyTypeDealbreaker":null,"isHivDealbreaker":null,"isFamilyTypeDealbreaker":null,"isFamilyIncomeDealbreaker":null,"isDrinkingDealbreaker":null,"locationTypeIds":null,"isLocationTypeDealbreaker":null,"isLocationNameDealbreaker":null,"locationNameOthers":"","isMaritalStatusDealbreaker":null,"isSmokingDealbreaker":null,"isFoodHabitsDealbreaker":null,"isGothraDealbreaker":null,"isManglikDealbreaker":null,"isProfileCreatedbyDealbreaker":null,"religionIdsValues":"","casteIdsValues":null,"motherTongueIdsValues":"","minimumEducationValues":"","occupationIdsValues":"","industryIdsValues":"","bodyTypeIdsValues":"","hivIdValue":null,"familyTypeIdsValues":"","familyIncomeValues":"","drinkingIdValues":"","locationNameIdsValues":null,"maritalStatusIdsValues":"","smokingIdsValues":"","foodHabitsIdsValues":"","gothraIdsValues":"","manglikIdValue":null,"profileCreatedbyValues":"","heightFrom":null,"heightTo":null,"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133115,"isDeleted":null,"isActive":1,"partnerPreferenceId":2757,"isReligionDealbreaker":null,"casteIds":null,"isCasteDealbreaker":null,"isMotherTongueDealbreaker":null,"subcaste":"","religionIds":null,"motherTongueIds":null,"minimumEducation":null,"occupationIds":null,"industryIds":null,"bodyTypeIds":null,"income":null,"incomeValues":"","familyIncome":null,"hivId":0,"familyTypeIds":null,"drinkingId":null,"locationNameIds":"","maritalStatusIds":null,"smokingIds":null,"foodHabitsIds":null,"gothraIds":null,"manglikId":0,"profileCreatedby":null,"adbCount":0,"fifCount":0,"ageFrom":null,"ageTo":null,"isAgeDealbreaker":null,"isminimumEducationDealbreaker":null,"userId":165614,"createdOn":1440167133115,"height":null},"profileAgentDtl":{"campaignId":"","acquirerCode":0,"createdBy":4444,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"relationshipMangerId":0,"sourceCode":100000004,"userId":165614,"createdOn":1440167133110,"idOdNo":"","relationshipMangerName":null,"relationshipMangerContact":"","profileAgentDtlId":2757,"dateOfEntry":1437935400000,"formSerialNo":"3661","sourceCodeVal":null,"agentCode":null,"acquirerCodeVal":null,"agentName":"","agentMobileNo":"","adBookingNo":""},"profileBasicRegistrationDtl":{"sourceId":null,"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133109,"isDeleted":null,"isActive":1,"genderId":100000596,"priorityId":100001671,"profileCreatedById":100000590,"webSourceId":100001672,"dob":null,"genderVal":"Male","userId":165614,"profileCompleteness":null,"createdOn":1440167133109,"profileDtlId":2757,"nickName":null,"relation":null,"regViewersCount":null,"guestViewersCount":null,"trustScore":20,"webSourceVal":"Newspaper ","priorityVal":"Medium","profileCreatedByval":"Self","fieldContentModerationStatusId":null,"photoModerationStatusId":null,"documentModerationStatusId":null,"isPhotoHide":null,"isHoroscopeHide":null},"profileAstrologyDtl":{"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133111,"isDeleted":null,"isActive":1,"userId":165614,"createdOn":1440167133111,"profileAstrologyDtlId":2757,"gothraId":0,"gaanId":0,"nakshatraId":0,"sunSignId":0,"moonSignId":0,"manglikFlagId":0,"placeOfBirth":"0","timeOfBirth":null,"isPreferredPartnerDtl":null,"gothraVal":"","gaanVal":"","nakshatraVal":"","sunSignVal":"","moonSignVal":"","manglikFlagVal":""},"profileFamilyDtl":{"permanentAddress":null,"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133111,"isDeleted":null,"isActive":1,"familyIncome":0.0,"fathersStatusId":0,"mothersStatusId":0,"fathersOccupationId":0,"mothersOccupationId":0,"mothersIndustryId":null,"fathersIndustryId":null,"familyTypeId":0,"familyValueId":0,"familyKindId":0,"familyStatusId":0,"userId":165614,"createdOn":1440167133111,"moderatedOn":null,"profileFamilyDtlId":2757,"fathersName":"","fathersStatusVal":null,"motherName":"","mothersStatusVal":null,"numberOfSibling":0,"shortRefModerationStatus":null,"fathersOccupationVal":null,"mothersOccupationVal":null,"familyTypeVal":null,"familyValueVal":null,"familyKindVal":null,"familyStatusVal":null,"mothersIndustryVal":null,"fathersIndustryVal":null,"familyIncomeVal":"","moderatedBy":null,"moderatorRemarks":null,"ref1fullName":null,"ref1relationship":null,"ref1emailId":null,"ref1phoneNo":null,"ref1remarks":null,"ref2fullName":null,"ref2relationship":null,"ref2emailId":null,"ref2phoneNo":null,"ref2remarks":null},"profileLifestyleDtl":{"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"favouriteBooksTypeIds":null,"favouriteHobbiesTypeIds":null,"favouriteMoviesTypeIds":null,"favouriteMusicTypeIds":null,"favouriteSportsTypeIds":null,"livingInHouseTypeId":0,"vehicleTypeOwnedId":0,"petsId":0,"drinkingStatusId":0,"numberOfKids":0,"userId":165614,"createdOn":1440167133110,"moderatedOn":null,"moderatedBy":null,"isModerated":null,"moderatorRemarks":null,"profileLifestyleDtlId":2757,"smokingStatusId":0,"foodHabitsId":0,"financialPlansId":0,"retirementPlansId":0,"vehicleDescription":null,"vehicleNumber":0,"childrenDesiredId":null,"isReligionImportantFlagId":null,"religiousBeliefs":0,"smokingStatusVal":"","drinkingStatusVal":null,"foodHabitsVal":"","financialPlansVal":null,"retirementPlansVal":null,"vehicleTypeOwnedVal":null,"livingInHouseTypeVal":null,"petsVal":null,"childrenDesiredVal":null,"favouriteBooksTypeVals":"","favouriteMoviesTypeVals":"","favouriteMusicTypeVals":"","favouriteSportsTypeVals":"","favouriteHobbiesTypeVals":"","isReligionImportantFlagVal":null,"religiousBeliefsVal":"","favouriteHobbiesRating":null,"favouriteHobbiesDescription":null,"noOfKidsVal":null},"profileOccupationEducationDtl":{"highestSpecializationVal":null,"highestSpecializationOthersVal":"","createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"highestEducationId":null,"occupationId":null,"designationId":null,"incomeCurrencyId":null,"education2id":0,"education3id":0,"specialization2id":0,"specialization3id":0,"highestSpecializationId":null,"industryId":null,"annualIncome":null,"userId":165614,"createdOn":1440167133110,"moderatedOn":null,"moderatedBy":null,"isModerated":null,"moderatorRemarks":null,"highestEducationVal":null,"occupationVal":null,"industryVal":null,"incomeCurrencyVal":null,"designationVal":null,"education3val":null,"education2val":null,"specialization2val":null,"specialization2othersVal":"","specialization3val":null,"specialization3othersVal":"","additionalQualification":null,"professionalQualification":null,"occupationOthersVal":"","departmentId":null,"employmentSectorId":null,"companyName":"","highestEducationInstituteVal":null,"education2instituteVal":"0","education3instituteVal":"","professionalQualificationVal":null,"departmentVal":null,"employmentSectorVal":null,"annualIncomeVal":null,"profileOccupationEducationDtlId":2757,"schoolName2":"","schoolName1":"","education2instituteId":null,"education3instituteId":null},"profilePersonalDtl":{"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"familyOriginId":0,"stateId":null,"countryId":null,"numChildrenProspect":0,"countryVal":null,"stateVal":null,"landmark":null,"locationVal":null,"userId":165614,"locationId":null,"religionId":100000598,"createdOn":1440167133110,"isPreferredPartnerDtl":null,"maritalStatusId":null,"maritalStatusVal":null,"subCaste":"","profilePersonalDtlId":2757,"motherTongueId":100000618,"casteId":null,"marryOutsideCasteId":0,"familyOriginVal":null,"facebookHandle":"","linkedInHandle":"","twiterHandle":null,"googlePlus":null,"casteText":"Kshatriya","homeTownText":"0","religionVal":"Hindu","motherTongueVal":"Bengali","marryOutsideCasteVal":"","isSocialMediaVerified":null,"numChildrenProspectVal":null,"locality":null},"profilePhysicalAttributesDtl":{"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"hivId":0,"bodyTypeId":0,"complexionId":0,"bloodGroupId":0,"userId":165614,"createdOn":1440167133110,"height":null,"isPreferredPartnerDtl":null,"hairColourId":0,"eyeColourId":0,"hairLengthId":0,"physicalStatusId":null,"disabilitiesVal":"","hivVal":"","knownAilmentVal":"","bodyTypeVal":null,"complexionVal":null,"hairColourVal":"","eyeColourVal":"","hairLengthVal":"","physicalStatusVal":null,"bloodGroupVal":null,"profilePhysicalAttributesDtlId":2757,"weight":null},"profileSiblingsDtl":null,"profileImageDtl":null,"notes":[{"createdBy":4444,"modifiedBy":4444,"modifiedOn":1440167133115,"isDeleted":null,"isActive":1,"userId":165614,"createdOn":1440167133115,"profileNotesDtlId":3499,"notesDescription":""}],"references":[],"relationOthers":[],"photoIdentificationDetails":null,"preModAboutMes":[{"answer":"null ","preModerationAboutMeId":1439283144614540579,"moderationStatus":1,"createdBy":4444,"questionVal":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133092,"isActive":1,"isAnswerChange":0,"userId":165614,"questionId":1,"createdOn":1440167133092},{"answer":"null ","preModerationAboutMeId":1439283144614540580,"moderationStatus":1,"createdBy":4444,"questionVal":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133093,"isActive":1,"isAnswerChange":0,"userId":165614,"questionId":2,"createdOn":1440167133093},{"answer":"null ","preModerationAboutMeId":1439283144614540581,"moderationStatus":1,"createdBy":4444,"questionVal":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133094,"isActive":1,"isAnswerChange":0,"userId":165614,"questionId":3,"createdOn":1440167133094},{"answer":"null ","preModerationAboutMeId":1439283144614540582,"moderationStatus":1,"createdBy":4444,"questionVal":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133094,"isActive":1,"isAnswerChange":0,"userId":165614,"questionId":4,"createdOn":1440167133094}],"preModContent":[{"preModerationContentId":1439307323336466240,"isChangeMatrimonyUserName":null,"isChangeLocality":0,"isChangeLandmark":0,"permanentAddress":"Dev Barman,Mayapur,PO-Talbagicha,Kharadpur-721306","isChangePermanentAddress":1,"nameOfInstitutionHighestEducation":"0","highestSpecializationVal":null,"highestSpecializationOthersVal":null,"createdBy":4444,"matrimonyUserName":"SUDIPTO DEB BARMAN","userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133104,"isDeleted":null,"isActive":1,"highestEducationId":0,"occupationId":0,"designationId":0,"incomeCurrencyId":null,"highestSpecializationId":0,"industryId":0,"annualIncome":0.0,"stateId":100000269,"countryId":100000101,"dob":520972200000,"countryVal":null,"stateVal":null,"landmark":"","userId":165614,"moderationStatusId":null,"createdOn":1440167133104,"isChangeNameOfInstitutionHighestEducation":0,"isChangeHighestSpecialization":0,"highestEducationVal":null,"isChangeHighestEducation":0,"occupationVal":null,"isChangeOccupation":0,"industryVal":"","isChangeIndustry":0,"incomeCurrencyVal":null,"isChangeIncomeCurrency":0,"customerTypeId":null,"customerTypeVal":null,"isChangeCustomerType":1,"isChangeDob":1,"maritalStatusId":100000900,"maritalStatusVal":"Never Married","isChangeMaritalStatus":1,"isChangeCountry":1,"isChangeState":1,"cityId":0,"isChangeCity":0,"cityVal":null,"isChangeAnnualIncome":0,"designationVal":null,"isChangeDesignation":0,"subCaste":null,"hometown":null,"isChangeSubCaste":null,"isChangeHometown":null,"ref1fullName":null,"isChangeRef1fullName":null,"ref1relationship":null,"isChangeRef1relationship":null,"ref1emailId":null,"isChangeRef1emailId":null,"ref1phoneNo":null,"isChangeRef1phoneNo":null,"ref1remarks":null,"isChangeRef1remarks":null,"ref2fullName":null,"isChangeRef2fullName":null,"ref2relationship":null,"isChangeRef2relationship":null,"ref2emailId":null,"isChangeRef2emailId":null,"ref2phoneNo":null,"isChangeRef2phoneNo":null,"ref2remarks":null,"isChangeRef2remarks":null,"typeOfCustomer":null,"isChangeTypeOfCustomer":null,"highestEducationInstituteId":null,"typeOfCustomerId":100000006,"locality":""}],"preModReferences":[],"preModShortReferences":[{"moderationStatus":null,"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133099,"isDeleted":null,"isActive":1,"userId":165614,"createdOn":1440167133099,"isModerated":null,"premoderationprofileImageDtlId":1772,"ref1fullName":"","isChangeRef1fullName":0,"ref1relationship":"","isChangeRef1relationship":0,"ref1emailId":"","isChangeRef1emailId":0,"ref1phoneNo":null,"isChangeRef1phoneNo":0,"ref1remarks":null,"ref2fullName":"","isChangeRef2fullName":0,"ref2relationship":"","isChangeRef2relationship":0,"ref2emailId":"","isChangeRef2emailId":0,"ref2phoneNo":null,"isChangeRef2phoneNo":0,"ref2remarks":null}],"paymentTransactions":[],"userPlanMappings":[],"userFeatureMappings":[],"userPlanMapping":null,"blockedProfiles":[],"notMyTypeProfiles":[]}')
I want to convert the above to a convenient data frame with 1 row each MatrimonyUserId in the above.I have tried a few things but unable to get this in desired format.
Assuming you can wrangle the json data into a nested list....
x <- jsonlite::fromJSON(jsontext)
I've found it's easiest to parse complex list structures by using the pipe operator and frequently checking the structure (limited to 1 or 2 levels.
str1 <- function(x) str(x, 1)
str2 <- function(x) str(x, 2)
# for pipe operator
library("magittr")
x %>% str1
x %>% .[[1]] %>% str2
Etc.

How to combine multiple JSON files into a single file in R

i have three JSON files
json1 contains [[1,5],[5,7],[8,10]]
json2 contains [[5,6],[4,5],[5,8]]
json3 contains [[4,7],[3,4],[4,8]]
I want to merge them into one single file jsonmerge:
[[[1,5],[5,7],[8,10]],[[5,6],[4,5],[5,8]],[[4,7],[3,4],[4,8]]]
I tried concatenate but it gave results in this format
[[5,6],[4,5],[5,8]],
[[5,6],[4,5],[5,8]],
[[4,7],[3,4],[4,8]]
Any suggestions?
thanks in advance.
If you are using the rjson package, then you need to concatenate them into a list:
library(rjson)
json1 <- fromJSON(file = "json1")
json2 <- fromJSON(file = "json2")
json3 <- fromJSON(file = "json3")
jsonl <- list(json1, json2, json3)
jsonc <- toJSON(jsonc)
jsonc
[1] "[[[1,5],[5,7],[8,10]],[[5,6],[4,5],[5,8]],[[4,7],[3,4],[4,8]]]"
write(jsonc, file = "jsonc")
If you have many files, you can put them in a vector and use lapply to save some typing:
files <- c("json1", "json2", "json3")
jsonl <- lapply(files, function(f) fromJSON(file = f))
jsonc <- toJSON(jsonl)
write(jsonc, file = "jsonc")

converting a column in json format into a new data frame

I have a csv file and one of the column is in json format.
that particular column in json format looks like this:
{"title":" ","body":" ","url":"thedailygreen print this healthy eating eat safe Dirty Dozen Foods page all"}
I have read this file using read.csv in R. Now, how to I create a new data frame from this column which should have field names as title, body and url.
You can use package RJSONIO to parse the column values, e.g. :
library(RJSONIO)
# create an example data.frame with a json column
cell1 <- '{"title":"A","body":"X","url":"http://url1.x"}'
cell2 <- '{"title":"B","body":"Y","url":"http://url2.y"}'
cell3 <- '{"title":"C","body":"Z","url":"http://url3.z"}'
df <- data.frame(jsoncol = c(cell1,cell2,cell3),stringsAsFactors=F)
# parse json and create a data.frame
res <- do.call(rbind.data.frame,
lapply(df$jsoncol, FUN=function(x){ as.list(fromJSON(x))}))
> res
title body url
A X http://url1.x
B Y http://url2.y
C Z http://url3.z
N.B. :
the code above assumes all the cells contains title, body and url only. If there can be other properties in the json cells, use this code instead :
vals <- lapply(df$jsoncol,fromJSON)
res <- do.call(rbind, lapply(vals,FUN=function(v){ data.frame(title=v['title'],
body =v['body'],
url =v['url']) }))
EDIT (as per comment):
I've read the file using the following code :
df <- read.table(file="c:\\sample.tsv",
header=T, sep="\t", colClasses="character")
then parsed using this code :
# define a simple function to turn NULL to NA
naIfnull <- function(x){if(!is.null(x)) x else NA}
vals <- lapply(df$boilerplate,fromJSON)
res <- do.call(rbind,
lapply(vals,FUN=function(v){ v <- as.list(v)
data.frame(title=naIfnull(v$title),
body =naIfnull(v$body),
url =naIfnull(v$url)) }))

Saving a JSON object to file.JSON

I've create a JSON file, and I need to be able to share the file via email with other collaborators. However, although there are plenty of topics available on handling JSON objects in the R workspace, there are virtually no resources discussing how to actually export a JSON object to a .JSON file.
Here's a simple example:
list1 <- vector(mode="list", length=2)
list1[[1]] <- c("a", "b", "c")
list1[[2]] <- c(1, 2, 3)
exportJson <- toJSON(list1)
## Save the JSON to file
save(exportJson, file="export.JSON")
## Attempt to read in the JSON
library("rjson")
json_data <- fromJSON(file="export.JSON")
The final line, attempting to read in the JSON file, results in an error: "Error in fromJSON(file = "export.JSON") : unexpected character 'R'"
Obviously the save() function is not the way to go, but after extensive googling, I have found nothing that says how to export the JSON to a file. Any help would be greatly appreciated.
You can use write:
library(RJSONIO)
list1 <- vector(mode="list", length=2)
list1[[1]] <- c("a", "b", "c")
list1[[2]] <- c(1, 2, 3)
exportJson <- toJSON(list1)
> exportJson
[1] "[\n [ \"a\", \"b\", \"c\" ],\n[ 1, 2, 3 ] \n]"
write(exportJson, "test.json")
library("rjson")
json_data <- fromJSON(file="test.json")
> json_data
[[1]]
[1] "a" "b" "c"
[[2]]
[1] 1 2 3
There is also the jsonlite package:
library(jsonlite)
exportJSON <- toJSON(list1)
write(exportJSON, "test.json")
list2 <- fromJSON("test.json")
identical(list1, list2)

JSON to R for Data Mining

I am trying to grab tweets using the Topsy Otter api, so I can perform some data mining on it for my dissertation.
So far, I have got:
library(RJSONIO)
library(RCurl)
tweet_data <- getURL("http://otter.topsy.com/search.json?q=PSN&mintime=1301634000&perpage=10&maxtime=1304226000&apikey=xxx")
fromJSON(tweet_data)
Which works fine. Now however, I want to return just a couple details from this file, 'content' and 'trackback_date'. I cannot seem to figure out how - I have tried cobbling a couple of examples together, but unable to extract what I want.
Here is what I've tried so far:
trackback_date <- lapply(tweet_data$result, function(x){x$trackback_date})
content <- lapply(tweet_data$result, function(x){x$content})
Any help would be greatly appreciated, thank you.
edit
I have also tried:
library("rjson")
# use rjson
tweet_data <- fromJSON(paste(readLines("http://otter.topsy.com/search.json?q=PSN&mintime=1301634000&perpage=10&maxtime=1304226000&apikey=xxx"), collapse=""))
# get a data from Topsy Otter API
# convert JSON data into R object using fromJSON()
trackback_date <- lapply(tweet_data$result, function(x){x$trackback_date})
content <- lapply(tweet_data$result, function(x){x$content})
Basic processing of Topsy Otter API response:
library(RJSONIO)
library(RCurl)
tweet_data <- getURL("http://otter.topsy.com/search.json?q=PSN&mintime=1301634000&perpage=10&maxtime=1304226000&apikey=xxx")
#
# Addition to your code
#
tweets <- fromJSON(tweet_data)$response$list
content <- sapply(tweets, function(x) x$content)
trackback_date <- sapply(tweets, function(x) x$trackback_date)
EDIT: Processing multiple pages
Function gets 100 items from specified page:
pagetweets <- function(page){
url <- paste("http://otter.topsy.com/search.json?q=PSN&mintime=1301634000&page=",page,
"&perpage=100&maxtime=1304226000&apikey=xxx",
collapse="", sep="")
tweet_data <- getURL(url)
fromJSON(tweet_data)$response$list
}
Now we can apply it to multiple pages:
tweets <- unlist(lapply(1:10, pagetweets), recursive=F)
And, voila, this code:
content <- sapply(tweets, function(x) x$content)
trackback_date <- sapply(tweets, function(x) x$trackback_date)
returns you 1000 records.