Nested JSON data to CSV - json
I am trying to convert JSON from a URL to CSV/data.frame in R. The data is at: Data_Kayak and pasted below.
So far, I have tried the newJSONParser() and fromJSON from library(rjson). I got close enough to generating a list but that is the best I've done so far. I need the in CSV so I can analyze and plot it.
{"origin":{"latitude":37.25,"name":"Springfield, MO","shortName":"SGF","longitude":-93.38333333},"destinations":[{"country":{"latitude":40,"name":"United States","shortName":"US","longitude":-96.5},"flight":359,"temp":[[18,43],[21,46],[28,54],[36,61],[45,72],[54,82],[59,88],[57,86],[48,77],[37,64],[25,52],[18,45]],"flightInfo":{"price":359},"languages":["en"],"city":{"latitude":39.73917,"name":"Denver","id":"12493","longitude":-104.98417},"shortDateRange":"2/28 - 3/8","airport":{"latitude":39.86310781,"popularity":40614,"name":"Denver, CO","shortName":"DEN","longitude":-104.6748161},"flightMaxDuration":135,"activities":["ski","gambling","golf"],"days":8,"region":{"latitude":39.149496257664,"name":"Colorado","shortName":"CO","longitude":-104.94804819074001},"depart":"20150228","airline":"United","return":"20150308","flightMaxStops":0},{"country":{"latitude":40,"name":"United States","shortName":"US","longitude":-96.5},"flight":279,"temp":[[16,32],[19,36],[28,46],[39,59],[48,70],[57,79],[63,84],[63,82],[54,75],[43,63],[32,48],[21,36]],"flightInfo":{"price":279},"languages":["en"],"city":{"latitude":41.88451,"name":"Chicago","id":"12514","longitude":-87.63095},"shortDateRange":"2/19 - 2/28","airport":{"latitude":41.98333333,"popularity":32415,"name":"Chicago, IL","shortName":"ORD","longitude":-87.9},"flightMaxDuration":90,"activities":["ski","gambling","golf"],"days":9,"region":{"latitude":40.268763621091,"name":"Illinois","shortName":"IL","longitude":-89.062810958234},"depart":"20150219","airline":"American Airlines","return":"20150228","flightMaxStops":0},{"country":{"latitude":40,"name":"United States","shortName":"US","longitude":-96.5},"flight":379,"temp":[[34,52],[37,57],[45,66],[52,73],[61,81],[68,86],[72,90],[70,88],[64,82],[54,73],[43,64],[36,55]],"flightInfo":{"price":379},"languages":["en"],"city":{"latitude":33.74889,"name":"Atlanta","id":"11123","longitude":-84.38806},"shortDateRange":"2/27 - 3/1","airport":{"latitude":33.65,"popularity":30080,"name":"Atlanta, GA","shortName":"ATL","longitude":-84.43333333},"flightMaxDuration":120,"activities":["golf"],"days":2,"region":{"latitude":32.929258844272,"name":"Georgia","shortName":"GA","longitude":-83.621524741888},"depart":"20150227","airline":"Delta","return":"20150301","flightMaxStops":0},{"country":{"latitude":40,"name":"United States","shortName":"US","longitude":-96.5},"flight":279,"temp":[[36,55],[39,61],[48,68],[55,77],[64,84],[72,91],[75,97],[75,95],[68,88],[57,79],[46,66],[37,57]],"flightInfo":{"price":279},"languages":["en"],"city":{"latitude":32.78333,"name":"Dallas","id":"16406","longitude":-96.8},"shortDateRange":"2/25 - 3/3","airport":{"latitude":32.89710131,"popularity":24500,"name":"Dallas, TX","shortName":"DFW","longitude":-97.04386711},"flightMaxDuration":90,"activities":["golf"],"days":6,"region":{"latitude":31.344670987137,"name":"Texas","shortName":"TX","longitude":-97.666193354857},"depart":"20150225","airline":"American Airlines","return":"20150303","flightMaxStops":0}]}
jsonlite did the trick
library(jsonlite)
library("RCurl", lib.loc="~/R/win-library/3.1")
setwd("~/Kayak R")
url <- "http://www.kayak.com/h/explore/api?airport=SGF&v=1&flightMaxStops=0"
kkjson <- getURLContent(url)
kk_txt_from_json <- fromJSON(kkjson)
kk_table = data.frame(kk_txt_from_json)
Related
Edit multiple JSON files in R and saving them to CSV (Alternative for "for" loop)
I have multiple JSON files containing Tweets from Twitter. I want to import and edit them in R one by one. For a single file my code looks like this: data <- fromJSON("filename.json") data <- data[c(1:3,13,14)] data$lang <- ifelse(data$lang!="de",NA,data$lang) data <- na.omit(data) write_as_csv(data,"filename.csv") Now I want to apply this code to multiple files. I found a "for" loop code here: Loop in R to read many files Applied to my problem it should look something like this: setwd("~/Documents/Elections") ldf <- list() listjson <- dir(pattern = "*.json") for (k in 1:length(listjson)){ data[k] <- fromJSON(listjson[k]) data[k] <- data[k][c(1:3,13,14)] data[k]$lang <- ifelse(data[k]$lang!="de",NA,data[k]$lang) data[k] <- na.omit(data[k]) filename <- paste(k, ".csv") write_as_csv(listjson[k],filename) } But the first line in the loop already doesn't work. > data[k] <- fromJSON(listjson[k]) Warning message: In `[<-.data.frame`(`*tmp*`, k, value = list(createdAt = c(1505935036000, : provided 35 variables to replace 1 variables I can't figure out why. Also, I wonder if there is a nicer way to realize this problem without using a for loop. I read about the apply family, I just don't know how to apply it to my problem. Thanks in advance! This is an example how my data looks: https://drive.google.com/file/d/19cRS6p_mHbO6XXprfvc6NPZWuf_zG7jr/view?usp=sharing
It should work like this: setwd("~/Documents/Elections") listjson <- dir(pattern = "*.json") for (k in 1:length(listjson)){ # Load the JSON that correspond to the k element in your list of files data <- fromJSON(listjson[k]) # Select relevant columns from the dataframe data <- data[,c(1:3,13,14)] # Manipulate data data$lang <- ifelse(data$lang!="de",NA,data$lang) data <- na.omit(data) filename <- paste(listjson[k], ".csv") write_as_csv(data,filename) } For the second part of the question, apply applies a function over rows or columns of a dataframe. This is not your case, as you are looping through a vector of character to get filenames to be used somewhere else.
SDMX to dataframe with RSDMX in R
I'm trying to get data from the Lithuanian Statistics Department. They offer SDMX API with either XML or JSON (LSD). The example XML shown is : https://osp-rs.stat.gov.lt/rest_xml/data/S3R629_M3010217 which downloads the XML file. I tried following: devtools::install_github("opensdmx/rsdmx") library(rsdmx) string <- "https://osp-rs.stat.gov.lt/rest_xml/data/S3R629_M3010217" medianage <- readSDMX(string) which results in error: <simpleError in doTryCatch(return(expr), name, parentenv, handler): Invalid SDMX-ML file> I also tried simply reading in the manually downloaded file devtools::install_github("opensdmx/rsdmx") library(rsdmx) medianage <- readSDMX(file="rest_data_M3010217_20180116163251.xml" , isURL = FALSE) medianage <- as.data.frame(medianage) results in medianage being NULL (empty) Maybe soneone has an idea, how I could solve downloading /transforming the data from LSD by using either: https://osp-rs.stat.gov.lt/rest_xml/data/S3R629_M3010217 https://osp-rs.stat.gov.lt/rest_json/data/S3R629_M3010217 Thanks a lot!
In order to use rsdmx for this datasource, some enhancements have been added (see details at https://github.com/opensdmx/rsdmx/issues/141). You will need re-install rsdmx from Github (version 0.5-11) You can use the url of the SDMX-ML file library(rsdmx) url <- "https://osp-rs.stat.gov.lt/rest_xml/data/S3R629_M3010217" medianage <- readSDMX(url) df <- as.data.frame(medianage) A connector has been added in rsdmx to facilitate data query on the LSD (Lithuanian Statistics Department) SDMX endpoint. See below an example on how to use it. sdmx <- readSDMX(providerId = "LSD", resource = "data", flowRef = "S3R629_M3010217", dsd = TRUE) df <- as.data.frame(sdmx, labels = TRUE) The above example shows how to enrich the data.frame with code labels extracted from the SDMX Data Structure Definition (DSD). For this, specify dsd = TRUE with readSDMX. This allows then to use labels = TRUE when converting to data.frame. For filtering data with readSDMX, e.g. (startPeriod, endPeriod, code filters), check this page https://github.com/opensdmx/rsdmx/wiki#readsdmx-as-helper-function
JSON to data frame in R
I am trying to turn a .json file into a data frame for data visualization. If I run the below code I get picture 1. library(jsonlite) jdata <- fromJSON("test.json") data <- as.data.frame(jdata) And when I try to transpose it, I get picture 2. data2 <- as.data.frame(t(data)) This is how the json looks like raw: I don't understand why column one has no name or is not part of the data frame (is jsonlite assuming these are tittles?). How can I overcome this? I need a data frame from the json files: Column1 (with the dates) | Column2 (I will divide it into values and coordinates
Try this for the input file test.json library(jsonlite) jdata <- read_json("test.json", simplifyVector = TRUE)
how to make a list from json string
I have a JSON file "adjFloatTest.data" .In R, i read the field "Volume" from that file using the following code: json <- fromJSON("adjFloatTest.data") volume <- json$volume the value of the volume is as follows > volume $AAPL $AAPL[[1]] 1980-12-12 16751200 $AAPL[[2]] 1980-12-15 100424081 $AAPL[[3]] 1980-12-16 0.1177374 $AAPL[[4]] 1980-12-17 7164476 $AAPL[[5]] 1980-12-18 5364366 Each elements corresponding to company,date,value. I want to store each dates into a list. How it is possible?
This will give you the list of dates sapply(volume,names)
the following should work: sapply(volume, function(x) lapply(x, "[[", 1)) but a reproducible example that could be copied+pasted would be helpful. If the above doesnt work, please use something like dput(volume[1:2]) to offer some workable sample data.
Using \Sexpr{} in LaTeX tabular environment
I am trying to use \Sexpr{} to include values from my R objects in a LaTeX table. I am essentially trying to replicate the summary output of a lm object in R because xtable's built in methods xtable.lm and xtable.summary.lm don't seem to include the Fstats, adjusted R-squared, etc (all the stuff at the bottom of the summary printout of the lm object in R console) So I tried accomplishing this by building a matrix to replicate the xtable.summary.lm output then construct a data frame of the relevant info for the extra stuff so I can refer to the values using \Sexpr{}. I tried doing this by using add.to.row to append the \multicolumn{} command in order to merge all columns of the last row of the LaTeX table and then just pass all the information I need into that cell of the table. The problem is that I get an "Undefined control sequence" for the \Sexpr{} expression in the \multicolumn{} expression. Are these two not compatible? If so, what am I doing wrong and if not does anyone know how to do what I am trying to do? Thanks, Here is the relevant part of my code: <<Test, results=tex>>= model1 <- lm(stndfnl ~ atndrte + frosh + soph) # Build matrix to replicate xtable.summary.lm output x <- summary(model1) colnames <- c("Estimate", "Std. Error", "t value", "Pr(<|t|)") rownames <- c("(Intercept)", attr(x$terms, "term.labels")) fpval <- pf(x$fstatistic[1],x$fstatistic[2], x$fstatistic[3], lower.tail=FALSE) mat1 <- matrix(coef(x), nrow=length(rownames), ncol=length(colnames), dimnames=list(rownames,colnames)) # Make a data frame for extra information to be called by \Sexpr in last row of table residse <- x$sigma degf <- x$df[2] multr2 <- x$r.squared adjr2 <- x$adj.r.squared fstat <- x$fstatistic[1] fstatdf1 <- x$fstatistic[2] fstatdf2 <- x$fstatistic[3] extradat <- data.frame(v1 = round(residse,4), v2 =degf, v3=round(multr2,4), v4=round(adjr2,4),v5=round(fstat,3), v6=fstatdf1, v7=fstatdf2, v8=round(fpval,6)) addtorow<- list() addtorow$pos <-list() addtorow$pos[[1]] <- dim(mat1)[1] addtorow$command <-c('\\hline \\multicolumn{5}{l}{Residual standard error:\\Sexpr{extradat$v1}} \\\\ ') print(xtable(mat1, caption="Summary Results for Regression in Equation \\eqref{model1} ", label="tab:model1"), add.to.row=addtorow, sanitize.text.function=NULL, caption.placement="top")
You don't need to have Sexpr in your R code; the R code can use the expressions directly. Sexpr is not a LaTeX command, even though it looks like one; it's an Sweave command, so it doesn't work to have it as output from R code. Try addtorow$command <-paste('\\hline \\multicolumn{5}{l}{Residual standard error:', extradat$v1, '} \\\\ ') Also, no need to completely recreate the matrix used by xtable, you can just build on the default output. Building on what you have above, something like: mytab <- xtable(model1, caption="Summary Results", label="tab:model1") addtorow$pos[[1]] <- dim(mytab)[1] print(mytab, add.to.row=addtorow, sanitize.text.function=NULL, caption.placement="top") See http://people.su.se/~lundh/reproduce/sweaveintro.pdf for an example which you might be able to use as is.