Reading complex json data as dataframe in R

Reading complex json data as dataframe in R - json

I have the following json data:
json_data <- data.frame(changedContent=c('{"documents":[],"images":[],"profileCommunications":[],"shortListedProfiles":[],"matrimonyUser":{"createdBy":null,"parentMatrimonyUserId":0,"userSalutationVal":"Mr.","matrimonyUserCode":"173773","matrimonyUserName":"SUDIPTO DEB BARMAN","emailAddress":"sudipto06#yahoo.com","contactNumber":"9434944429","emailOTP":"","mobilePhoneOTP":"","isEmailOTPVerified":1,"isMobilePhoneOTPverified":1,"isHideContact":null,"isHideEmail":null,"lastLogInTime":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133028,"isDeleted":null,"isActive":1,"isAllowedLogin":null,"numberOfChildProfile":null,"matrimonyUserTypeId":100000006,"matrimonyUserTypeVal":"Online Customer","onlineStatusFlag":null,"lastSystemTransactionDateTime":null,"isLive":null,"mobileCountryCode":0,"userStatusIdValue":"Registered and Verified","crmUserStatusIdValue":null,"deactivateReasonIdValue":null,"deactivateReason":null,"matrimonyUserId":165614,"userSalutationId":100001617,"userStatusId":100002760,"crmUserStatusId":null,"deactivateReasonId":null,"createdOn":null},"aboutMes":[],"partnerPreference":{"isSubcastDealbreaker":null,"isOccupationDealbreaker":null,"isIndustryDealbreaker":null,"isIncomeDealbreaker":null,"isHeightDealbreaker":null,"isBodyTypeDealbreaker":null,"isHivDealbreaker":null,"isFamilyTypeDealbreaker":null,"isFamilyIncomeDealbreaker":null,"isDrinkingDealbreaker":null,"locationTypeIds":null,"isLocationTypeDealbreaker":null,"isLocationNameDealbreaker":null,"locationNameOthers":"","isMaritalStatusDealbreaker":null,"isSmokingDealbreaker":null,"isFoodHabitsDealbreaker":null,"isGothraDealbreaker":null,"isManglikDealbreaker":null,"isProfileCreatedbyDealbreaker":null,"religionIdsValues":"","casteIdsValues":null,"motherTongueIdsValues":"","minimumEducationValues":"","occupationIdsValues":"","industryIdsValues":"","bodyTypeIdsValues":"","hivIdValue":null,"familyTypeIdsValues":"","familyIncomeValues":"","drinkingIdValues":"","locationNameIdsValues":null,"maritalStatusIdsValues":"","smokingIdsValues":"","foodHabitsIdsValues":"","gothraIdsValues":"","manglikIdValue":null,"profileCreatedbyValues":"","heightFrom":null,"heightTo":null,"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133115,"isDeleted":null,"isActive":1,"partnerPreferenceId":2757,"isReligionDealbreaker":null,"casteIds":null,"isCasteDealbreaker":null,"isMotherTongueDealbreaker":null,"subcaste":"","religionIds":null,"motherTongueIds":null,"minimumEducation":null,"occupationIds":null,"industryIds":null,"bodyTypeIds":null,"income":null,"incomeValues":"","familyIncome":null,"hivId":0,"familyTypeIds":null,"drinkingId":null,"locationNameIds":"","maritalStatusIds":null,"smokingIds":null,"foodHabitsIds":null,"gothraIds":null,"manglikId":0,"profileCreatedby":null,"adbCount":0,"fifCount":0,"ageFrom":null,"ageTo":null,"isAgeDealbreaker":null,"isminimumEducationDealbreaker":null,"userId":165614,"createdOn":1440167133115,"height":null},"profileAgentDtl":{"campaignId":"","acquirerCode":0,"createdBy":4444,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"relationshipMangerId":0,"sourceCode":100000004,"userId":165614,"createdOn":1440167133110,"idOdNo":"","relationshipMangerName":null,"relationshipMangerContact":"","profileAgentDtlId":2757,"dateOfEntry":1437935400000,"formSerialNo":"3661","sourceCodeVal":null,"agentCode":null,"acquirerCodeVal":null,"agentName":"","agentMobileNo":"","adBookingNo":""},"profileBasicRegistrationDtl":{"sourceId":null,"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133109,"isDeleted":null,"isActive":1,"genderId":100000596,"priorityId":100001671,"profileCreatedById":100000590,"webSourceId":100001672,"dob":null,"genderVal":"Male","userId":165614,"profileCompleteness":null,"createdOn":1440167133109,"profileDtlId":2757,"nickName":null,"relation":null,"regViewersCount":null,"guestViewersCount":null,"trustScore":20,"webSourceVal":"Newspaper ","priorityVal":"Medium","profileCreatedByval":"Self","fieldContentModerationStatusId":null,"photoModerationStatusId":null,"documentModerationStatusId":null,"isPhotoHide":null,"isHoroscopeHide":null},"profileAstrologyDtl":{"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133111,"isDeleted":null,"isActive":1,"userId":165614,"createdOn":1440167133111,"profileAstrologyDtlId":2757,"gothraId":0,"gaanId":0,"nakshatraId":0,"sunSignId":0,"moonSignId":0,"manglikFlagId":0,"placeOfBirth":"0","timeOfBirth":null,"isPreferredPartnerDtl":null,"gothraVal":"","gaanVal":"","nakshatraVal":"","sunSignVal":"","moonSignVal":"","manglikFlagVal":""},"profileFamilyDtl":{"permanentAddress":null,"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133111,"isDeleted":null,"isActive":1,"familyIncome":0.0,"fathersStatusId":0,"mothersStatusId":0,"fathersOccupationId":0,"mothersOccupationId":0,"mothersIndustryId":null,"fathersIndustryId":null,"familyTypeId":0,"familyValueId":0,"familyKindId":0,"familyStatusId":0,"userId":165614,"createdOn":1440167133111,"moderatedOn":null,"profileFamilyDtlId":2757,"fathersName":"","fathersStatusVal":null,"motherName":"","mothersStatusVal":null,"numberOfSibling":0,"shortRefModerationStatus":null,"fathersOccupationVal":null,"mothersOccupationVal":null,"familyTypeVal":null,"familyValueVal":null,"familyKindVal":null,"familyStatusVal":null,"mothersIndustryVal":null,"fathersIndustryVal":null,"familyIncomeVal":"","moderatedBy":null,"moderatorRemarks":null,"ref1fullName":null,"ref1relationship":null,"ref1emailId":null,"ref1phoneNo":null,"ref1remarks":null,"ref2fullName":null,"ref2relationship":null,"ref2emailId":null,"ref2phoneNo":null,"ref2remarks":null},"profileLifestyleDtl":{"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"favouriteBooksTypeIds":null,"favouriteHobbiesTypeIds":null,"favouriteMoviesTypeIds":null,"favouriteMusicTypeIds":null,"favouriteSportsTypeIds":null,"livingInHouseTypeId":0,"vehicleTypeOwnedId":0,"petsId":0,"drinkingStatusId":0,"numberOfKids":0,"userId":165614,"createdOn":1440167133110,"moderatedOn":null,"moderatedBy":null,"isModerated":null,"moderatorRemarks":null,"profileLifestyleDtlId":2757,"smokingStatusId":0,"foodHabitsId":0,"financialPlansId":0,"retirementPlansId":0,"vehicleDescription":null,"vehicleNumber":0,"childrenDesiredId":null,"isReligionImportantFlagId":null,"religiousBeliefs":0,"smokingStatusVal":"","drinkingStatusVal":null,"foodHabitsVal":"","financialPlansVal":null,"retirementPlansVal":null,"vehicleTypeOwnedVal":null,"livingInHouseTypeVal":null,"petsVal":null,"childrenDesiredVal":null,"favouriteBooksTypeVals":"","favouriteMoviesTypeVals":"","favouriteMusicTypeVals":"","favouriteSportsTypeVals":"","favouriteHobbiesTypeVals":"","isReligionImportantFlagVal":null,"religiousBeliefsVal":"","favouriteHobbiesRating":null,"favouriteHobbiesDescription":null,"noOfKidsVal":null},"profileOccupationEducationDtl":{"highestSpecializationVal":null,"highestSpecializationOthersVal":"","createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"highestEducationId":null,"occupationId":null,"designationId":null,"incomeCurrencyId":null,"education2id":0,"education3id":0,"specialization2id":0,"specialization3id":0,"highestSpecializationId":null,"industryId":null,"annualIncome":null,"userId":165614,"createdOn":1440167133110,"moderatedOn":null,"moderatedBy":null,"isModerated":null,"moderatorRemarks":null,"highestEducationVal":null,"occupationVal":null,"industryVal":null,"incomeCurrencyVal":null,"designationVal":null,"education3val":null,"education2val":null,"specialization2val":null,"specialization2othersVal":"","specialization3val":null,"specialization3othersVal":"","additionalQualification":null,"professionalQualification":null,"occupationOthersVal":"","departmentId":null,"employmentSectorId":null,"companyName":"","highestEducationInstituteVal":null,"education2instituteVal":"0","education3instituteVal":"","professionalQualificationVal":null,"departmentVal":null,"employmentSectorVal":null,"annualIncomeVal":null,"profileOccupationEducationDtlId":2757,"schoolName2":"","schoolName1":"","education2instituteId":null,"education3instituteId":null},"profilePersonalDtl":{"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"familyOriginId":0,"stateId":null,"countryId":null,"numChildrenProspect":0,"countryVal":null,"stateVal":null,"landmark":null,"locationVal":null,"userId":165614,"locationId":null,"religionId":100000598,"createdOn":1440167133110,"isPreferredPartnerDtl":null,"maritalStatusId":null,"maritalStatusVal":null,"subCaste":"","profilePersonalDtlId":2757,"motherTongueId":100000618,"casteId":null,"marryOutsideCasteId":0,"familyOriginVal":null,"facebookHandle":"","linkedInHandle":"","twiterHandle":null,"googlePlus":null,"casteText":"Kshatriya","homeTownText":"0","religionVal":"Hindu","motherTongueVal":"Bengali","marryOutsideCasteVal":"","isSocialMediaVerified":null,"numChildrenProspectVal":null,"locality":null},"profilePhysicalAttributesDtl":{"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133110,"isDeleted":null,"isActive":1,"hivId":0,"bodyTypeId":0,"complexionId":0,"bloodGroupId":0,"userId":165614,"createdOn":1440167133110,"height":null,"isPreferredPartnerDtl":null,"hairColourId":0,"eyeColourId":0,"hairLengthId":0,"physicalStatusId":null,"disabilitiesVal":"","hivVal":"","knownAilmentVal":"","bodyTypeVal":null,"complexionVal":null,"hairColourVal":"","eyeColourVal":"","hairLengthVal":"","physicalStatusVal":null,"bloodGroupVal":null,"profilePhysicalAttributesDtlId":2757,"weight":null},"profileSiblingsDtl":null,"profileImageDtl":null,"notes":[{"createdBy":4444,"modifiedBy":4444,"modifiedOn":1440167133115,"isDeleted":null,"isActive":1,"userId":165614,"createdOn":1440167133115,"profileNotesDtlId":3499,"notesDescription":""}],"references":[],"relationOthers":[],"photoIdentificationDetails":null,"preModAboutMes":[{"answer":"null ","preModerationAboutMeId":1439283144614540579,"moderationStatus":1,"createdBy":4444,"questionVal":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133092,"isActive":1,"isAnswerChange":0,"userId":165614,"questionId":1,"createdOn":1440167133092},{"answer":"null ","preModerationAboutMeId":1439283144614540580,"moderationStatus":1,"createdBy":4444,"questionVal":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133093,"isActive":1,"isAnswerChange":0,"userId":165614,"questionId":2,"createdOn":1440167133093},{"answer":"null ","preModerationAboutMeId":1439283144614540581,"moderationStatus":1,"createdBy":4444,"questionVal":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133094,"isActive":1,"isAnswerChange":0,"userId":165614,"questionId":3,"createdOn":1440167133094},{"answer":"null ","preModerationAboutMeId":1439283144614540582,"moderationStatus":1,"createdBy":4444,"questionVal":null,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133094,"isActive":1,"isAnswerChange":0,"userId":165614,"questionId":4,"createdOn":1440167133094}],"preModContent":[{"preModerationContentId":1439307323336466240,"isChangeMatrimonyUserName":null,"isChangeLocality":0,"isChangeLandmark":0,"permanentAddress":"Dev Barman,Mayapur,PO-Talbagicha,Kharadpur-721306","isChangePermanentAddress":1,"nameOfInstitutionHighestEducation":"0","highestSpecializationVal":null,"highestSpecializationOthersVal":null,"createdBy":4444,"matrimonyUserName":"SUDIPTO DEB BARMAN","userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133104,"isDeleted":null,"isActive":1,"highestEducationId":0,"occupationId":0,"designationId":0,"incomeCurrencyId":null,"highestSpecializationId":0,"industryId":0,"annualIncome":0.0,"stateId":100000269,"countryId":100000101,"dob":520972200000,"countryVal":null,"stateVal":null,"landmark":"","userId":165614,"moderationStatusId":null,"createdOn":1440167133104,"isChangeNameOfInstitutionHighestEducation":0,"isChangeHighestSpecialization":0,"highestEducationVal":null,"isChangeHighestEducation":0,"occupationVal":null,"isChangeOccupation":0,"industryVal":"","isChangeIndustry":0,"incomeCurrencyVal":null,"isChangeIncomeCurrency":0,"customerTypeId":null,"customerTypeVal":null,"isChangeCustomerType":1,"isChangeDob":1,"maritalStatusId":100000900,"maritalStatusVal":"Never Married","isChangeMaritalStatus":1,"isChangeCountry":1,"isChangeState":1,"cityId":0,"isChangeCity":0,"cityVal":null,"isChangeAnnualIncome":0,"designationVal":null,"isChangeDesignation":0,"subCaste":null,"hometown":null,"isChangeSubCaste":null,"isChangeHometown":null,"ref1fullName":null,"isChangeRef1fullName":null,"ref1relationship":null,"isChangeRef1relationship":null,"ref1emailId":null,"isChangeRef1emailId":null,"ref1phoneNo":null,"isChangeRef1phoneNo":null,"ref1remarks":null,"isChangeRef1remarks":null,"ref2fullName":null,"isChangeRef2fullName":null,"ref2relationship":null,"isChangeRef2relationship":null,"ref2emailId":null,"isChangeRef2emailId":null,"ref2phoneNo":null,"isChangeRef2phoneNo":null,"ref2remarks":null,"isChangeRef2remarks":null,"typeOfCustomer":null,"isChangeTypeOfCustomer":null,"highestEducationInstituteId":null,"typeOfCustomerId":100000006,"locality":""}],"preModReferences":[],"preModShortReferences":[{"moderationStatus":null,"createdBy":4444,"userSessionDtlId":null,"modifiedBy":4444,"modifiedOn":1440167133099,"isDeleted":null,"isActive":1,"userId":165614,"createdOn":1440167133099,"isModerated":null,"premoderationprofileImageDtlId":1772,"ref1fullName":"","isChangeRef1fullName":0,"ref1relationship":"","isChangeRef1relationship":0,"ref1emailId":"","isChangeRef1emailId":0,"ref1phoneNo":null,"isChangeRef1phoneNo":0,"ref1remarks":null,"ref2fullName":"","isChangeRef2fullName":0,"ref2relationship":"","isChangeRef2relationship":0,"ref2emailId":"","isChangeRef2emailId":0,"ref2phoneNo":null,"isChangeRef2phoneNo":0,"ref2remarks":null}],"paymentTransactions":[],"userPlanMappings":[],"userFeatureMappings":[],"userPlanMapping":null,"blockedProfiles":[],"notMyTypeProfiles":[]}')
I want to convert the above to a convenient data frame with 1 row each MatrimonyUserId in the above.I have tried a few things but unable to get this in desired format.

Assuming you can wrangle the json data into a nested list....
x <- jsonlite::fromJSON(jsontext)
I've found it's easiest to parse complex list structures by using the pipe operator and frequently checking the structure (limited to 1 or 2 levels.
str1 <- function(x) str(x, 1)
str2 <- function(x) str(x, 2)
# for pipe operator
library("magittr")
x %>% str1
x %>% .[[1]] %>% str2
Etc.

Related

R highcharter get data from plots saved as html

I plot data with highcharter package in R, and save them as html to keep interactive features. In most cases I plot more than one graph, therefore bring them together as a canvas.
require(highcharter)
hc_list <- lapply(list(sin,cos,tan,tanh),mapply,seq(1,5,by = 0.1)) %>%
lapply(function(x) highchart() %>% hc_add_series(x))
hc_grid <- hw_grid(hc_list,ncol = 2)
htmltools::browsable(hc_grid) # print
htmltools::save_html(hc_grid,"test_grid.html") # save
I want to extract the data from plots that I have saved as html in the past, just like these. Normally I would do hc_list[[1]]$x$hc_opts$series, but when I import html into R and try to do the same, I get an error. It won't do the job.
> hc_imported <- htmltools::includeHTML("test_grid.html")
> hc_imported[[1]]$x$hc_opts$series
Error in hc_imported$x : $ operator is invalid for atomic vectors
If I would be able to write a function like
get_my_data(my_imported_highcharter,3) # get data from 3rd plot
it would be the best. Regards.

You can use below code
require(highcharter)
hc_list <- lapply(list(sin,cos,tan,tanh),mapply,seq(1,5,by = 0.1)) %>%
lapply(function(x) highchart() %>% hc_add_series(x))
hc_grid <- hw_grid(hc_list,ncol = 2)
htmltools::browsable(hc_grid) # print
htmltools::save_html(hc_grid,"test_grid.html") # save
# hc_imported <- htmltools::includeHTML("test_grid.html")
# hc_imported[[1]]$x$hc_opts$series
library(jsonlite)
library(RCurl)
library(XML)
get_my_data<-function(my_imported_highcharter,n){
webpage <- readLines(my_imported_highcharter)
pagetree <- htmlTreeParse(webpage, error=function(...){})
body <- pagetree$children$html$children$body
divbodyContent <- body$children$div$children[[n]]
script<-divbodyContent$children[[2]]
data<-as.character(script$children[[1]])[6]
data<-fromJSON(data,simplifyVector = FALSE)
data<-data$x$hc_opts$series[[1]]$data
return(data)
}
get_my_data("test_grid.html",3)
get_my_data("test_grid.html",1)

Creating Dataframe from a json file

I want to create a proper data frame reading from a json file. I am able to view the created data frame properly, but dplyr function group_by does not work on it. It is probably because when I do the str() of the data frame created it gives every column as a list of strings as opposed to a vector of strings. I am trying the following:
require(jsonlite)
train_file = 'train.json'
train_data <- fromJSON(train_file)
rb = data.frame(sapply(train_data,c), stringsAsFactors = FALSE)
rbs = rb %>% slice(1:10)
rbsg = rbs %>%
group_by(colname)
This gives the following error:
Error: cannot group column colname, of class 'list'
Very specifically, the file that I am trying to read is the train.json file in this kaggle competition:
https://www.kaggle.com/c/two-sigma-connect-rental-listing-inquiries/data

You need to unnest() the column of interest before operating on it (e.g. before using group_by() or other dplyr verbs):
library(jsonlite)
library(tidyverse)
rbs <- fromJSON("train.json") %>%
bind_rows()
rbsg <- rbs %>%
unnest(bedrooms) %>%
group_by(bedrooms)
rbs_filtered <- rbs %>%
unnest(bathrooms) %>%
filter(bathrooms > 5)

jsonlite's fromJSON is returning a list of 2 lists instead of a df

I'm following the FCC's documentation to download some metadata about proceedings.
I don't believe I can post the data but you can get a free API key.
My code results in a listed list of 2 lists instead of a structured df from the JSON format.
My goal is to have a dataframe where each json element is it's own column.. like a normal df.
library(httr)
library(jsonlite)
datahere = "C:/fcc/"
setwd(datahere)
URL <- "https://publicapi.fcc.gov/ecfs/filings?api_key=<KEY HERE>&proceedings.name=14-28&sort=date_disseminated,DESC"
dataDF <- GET(URL)
dataJSON <- content(dataDF, as="text")
dataJSON <- fromJSON(dataJSON)
# NAs
dataJSON2 <- lapply(dataJSON, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
x <- do.call("rbind", dataJSON2)
x <- as.data.frame(x)

The JSON is really deeply nested, so you need to put a little more thought into converting between list and data.frame. The logic below pulls out a data.frame of 25 filings (102 variables) and 10 aggregations (25 variables).
# tackle the filings object
filings_df <- ldply(dataJSON$filings, function(x) {
# removes null list elements
x[sapply(x, is.null)] <- NA
# converts to a named character vector
unlisted_x <- unlist(x)
# converts the named character vector to data.frame
# with 1 column and rows for each element
d <- as.data.frame(unlisted_x)
# we need to transpose this data.frame because
# the rows should be columns, and don't check names when converting
d <- as.data.frame(t(d), check.names=F)
# now assign the actual names based on that original
# unlisted character vector
colnames(d) <- names(unlisted_x)
# now return to ldply function, which will automatically stack them together
return(d)
})
# tackle the aggregations object
# same exact logic to create the data.frame
aggregations_df <- ldply(dataJSON$aggregations, function(x) {
# removes null list elements
x[sapply(x, is.null)] <- NA
# converts to a named character vector
unlisted_x <- unlist(x)
# converts the named character vector to data.frame
# with 1 column and rows for each element
d <- as.data.frame(unlisted_x)
# we need to transpose this data.frame because
# the rows should be columns, and don't check names when converting
d <- as.data.frame(t(d), check.names=F)
# now assign the actual names based on that original
# unlisted character vector
colnames(d) <- names(unlisted_x)
# now return to ldply function, which will automatically stack them together
return(d)
})

How to read nested JSON structure?

I have some JSON that looks like this:
"total_rows":141,"offset":0,"rows":[
{"id":"1","key":"a","value":{"SP$Sale_Price":"240000","CONTRACTDATE$Contract_Date":"2006-10-26T05:00:00"}},
{"id":"2","key":"b","value":{"SP$Sale_Price":"2000000","CONTRACTDATE$Contract_Date":"2006-08-22T05:00:00"}},
{"id":"3","key":"c","value":{"SP$Sale_Price":"780000","CONTRACTDATE$Contract_Date":"2007-01-18T06:00:00"}},
...
In R, what would be the easiest way to produce a scatter-plot of SP$Sale_Price versus CONTRACTDATE$Contract_Date?
I got this far:
install.packages("rjson")
library("rjson")
json_file <- "http://localhost:5984/testdb/_design/sold/_view/sold?limit=100"
json_data <- fromJSON(file=json_file)
install.packages("plyr")
library(plyr)
asFrame <- do.call("rbind.fill", lapply(json_data, as.data.frame))
but now I'm stuck...
> plot(CONTRACTDATE$Contract_Date, SP$Sale_Price)
Error in plot(CONTRACTDATE$Contract_Date, SP$Sale_Price) :
object 'CONTRACTDATE' not found
How to make this work?

Suppose you have the following JSON-file:
txt <- '{"total_rows":141,"offset":0,"rows":[
{"id":"1","key":"a","value":{"SP$Sale_Price":"240000","CONTRACTDATE$Contract_Date":"2006-10-26T05:00:00"}},
{"id":"2","key":"b","value":{"SP$Sale_Price":"2000000","CONTRACTDATE$Contract_Date":"2006-08-22T05:00:00"}},
{"id":"3","key":"c","value":{"SP$Sale_Price":"780000","CONTRACTDATE$Contract_Date":"2007-01-18T06:00:00"}}]}'
Then you can read it as follows with the jsonlite package:
library(jsonlite)
json_data <- fromJSON(txt, flatten = TRUE)
# get the needed dataframe
dat <- json_data$rows
# set convenient names for the columns
# this step is optional, it just gives you nicer columnnames
names(dat) <- c("id","key","sale_price","contract_date")
# convert the 'contract_date' column to a datetime format
dat$contract_date <- strptime(dat$contract_date, format="%Y-%m-%dT%H:%M:%S", tz="GMT")
Now you can plot:
plot(dat$contract_date, dat$sale_price)
Which gives:
If you choose not to flatten the JSON, you can do:
json_data <- fromJSON(txt)
dat <- json_data$rows$value
sp <- strtoi(dat$`SP$Sale_Price`)
cd <- strptime(dat$`CONTRACTDATE$Contract_Date`, format="%Y-%m-%dT%H:%M:%S", tz="GMT")
plot(cd,sp)
Which gives the same plot:

I found a way that doesn't discard the field names:
install.packages("jsonlite")
install.packages("curl")
json <- fromJSON(json_file)
r <- json$rows
At this point r looks like this:
> class(r)
[1] "data.frame"
> colnames(r)
[1] "id" "key" "value"
After some more Googling and trial-and-error I landed on this:
f <- r$value
sp <- strtoi(f[["SP$Sale_Price"]])
cd <- strptime(f[["CONTRACTDATE$Contract_Date"]], format="%Y-%m-%dT%H:%M:%S", tz="GMT")
plot(cd,sp)
And the result on my full data-set...

JSON to R for Data Mining

I am trying to grab tweets using the Topsy Otter api, so I can perform some data mining on it for my dissertation.
So far, I have got:
library(RJSONIO)
library(RCurl)
tweet_data <- getURL("http://otter.topsy.com/search.json?q=PSN&mintime=1301634000&perpage=10&maxtime=1304226000&apikey=xxx")
fromJSON(tweet_data)
Which works fine. Now however, I want to return just a couple details from this file, 'content' and 'trackback_date'. I cannot seem to figure out how - I have tried cobbling a couple of examples together, but unable to extract what I want.
Here is what I've tried so far:
trackback_date <- lapply(tweet_data$result, function(x){x$trackback_date})
content <- lapply(tweet_data$result, function(x){x$content})
Any help would be greatly appreciated, thank you.
edit
I have also tried:
library("rjson")
# use rjson
tweet_data <- fromJSON(paste(readLines("http://otter.topsy.com/search.json?q=PSN&mintime=1301634000&perpage=10&maxtime=1304226000&apikey=xxx"), collapse=""))
# get a data from Topsy Otter API
# convert JSON data into R object using fromJSON()
trackback_date <- lapply(tweet_data$result, function(x){x$trackback_date})
content <- lapply(tweet_data$result, function(x){x$content})

Basic processing of Topsy Otter API response:
library(RJSONIO)
library(RCurl)
tweet_data <- getURL("http://otter.topsy.com/search.json?q=PSN&mintime=1301634000&perpage=10&maxtime=1304226000&apikey=xxx")
#
# Addition to your code
#
tweets <- fromJSON(tweet_data)$response$list
content <- sapply(tweets, function(x) x$content)
trackback_date <- sapply(tweets, function(x) x$trackback_date)
EDIT: Processing multiple pages
Function gets 100 items from specified page:
pagetweets <- function(page){
url <- paste("http://otter.topsy.com/search.json?q=PSN&mintime=1301634000&page=",page,
"&perpage=100&maxtime=1304226000&apikey=xxx",
collapse="", sep="")
tweet_data <- getURL(url)
fromJSON(tweet_data)$response$list
}
Now we can apply it to multiple pages:
tweets <- unlist(lapply(1:10, pagetweets), recursive=F)
And, voila, this code:
content <- sapply(tweets, function(x) x$content)
trackback_date <- sapply(tweets, function(x) x$trackback_date)
returns you 1000 records.

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Reading complex json data as dataframe in R - json

Related

R highcharter get data from plots saved as html

Creating Dataframe from a json file

jsonlite's fromJSON is returning a list of 2 lists instead of a df

How to read nested JSON structure?

JSON to R for Data Mining

Categories

Resources