Nested JSON data to CSV - json

I am trying to convert JSON from a URL to CSV/data.frame in R. The data is at: Data_Kayak and pasted below.
So far, I have tried the newJSONParser() and fromJSON from library(rjson). I got close enough to generating a list but that is the best I've done so far. I need the in CSV so I can analyze and plot it.
{"origin":{"latitude":37.25,"name":"Springfield, MO","shortName":"SGF","longitude":-93.38333333},"destinations":[{"country":{"latitude":40,"name":"United States","shortName":"US","longitude":-96.5},"flight":359,"temp":[[18,43],[21,46],[28,54],[36,61],[45,72],[54,82],[59,88],[57,86],[48,77],[37,64],[25,52],[18,45]],"flightInfo":{"price":359},"languages":["en"],"city":{"latitude":39.73917,"name":"Denver","id":"12493","longitude":-104.98417},"shortDateRange":"2/28 - 3/8","airport":{"latitude":39.86310781,"popularity":40614,"name":"Denver, CO","shortName":"DEN","longitude":-104.6748161},"flightMaxDuration":135,"activities":["ski","gambling","golf"],"days":8,"region":{"latitude":39.149496257664,"name":"Colorado","shortName":"CO","longitude":-104.94804819074001},"depart":"20150228","airline":"United","return":"20150308","flightMaxStops":0},{"country":{"latitude":40,"name":"United States","shortName":"US","longitude":-96.5},"flight":279,"temp":[[16,32],[19,36],[28,46],[39,59],[48,70],[57,79],[63,84],[63,82],[54,75],[43,63],[32,48],[21,36]],"flightInfo":{"price":279},"languages":["en"],"city":{"latitude":41.88451,"name":"Chicago","id":"12514","longitude":-87.63095},"shortDateRange":"2/19 - 2/28","airport":{"latitude":41.98333333,"popularity":32415,"name":"Chicago, IL","shortName":"ORD","longitude":-87.9},"flightMaxDuration":90,"activities":["ski","gambling","golf"],"days":9,"region":{"latitude":40.268763621091,"name":"Illinois","shortName":"IL","longitude":-89.062810958234},"depart":"20150219","airline":"American Airlines","return":"20150228","flightMaxStops":0},{"country":{"latitude":40,"name":"United States","shortName":"US","longitude":-96.5},"flight":379,"temp":[[34,52],[37,57],[45,66],[52,73],[61,81],[68,86],[72,90],[70,88],[64,82],[54,73],[43,64],[36,55]],"flightInfo":{"price":379},"languages":["en"],"city":{"latitude":33.74889,"name":"Atlanta","id":"11123","longitude":-84.38806},"shortDateRange":"2/27 - 3/1","airport":{"latitude":33.65,"popularity":30080,"name":"Atlanta, GA","shortName":"ATL","longitude":-84.43333333},"flightMaxDuration":120,"activities":["golf"],"days":2,"region":{"latitude":32.929258844272,"name":"Georgia","shortName":"GA","longitude":-83.621524741888},"depart":"20150227","airline":"Delta","return":"20150301","flightMaxStops":0},{"country":{"latitude":40,"name":"United States","shortName":"US","longitude":-96.5},"flight":279,"temp":[[36,55],[39,61],[48,68],[55,77],[64,84],[72,91],[75,97],[75,95],[68,88],[57,79],[46,66],[37,57]],"flightInfo":{"price":279},"languages":["en"],"city":{"latitude":32.78333,"name":"Dallas","id":"16406","longitude":-96.8},"shortDateRange":"2/25 - 3/3","airport":{"latitude":32.89710131,"popularity":24500,"name":"Dallas, TX","shortName":"DFW","longitude":-97.04386711},"flightMaxDuration":90,"activities":["golf"],"days":6,"region":{"latitude":31.344670987137,"name":"Texas","shortName":"TX","longitude":-97.666193354857},"depart":"20150225","airline":"American Airlines","return":"20150303","flightMaxStops":0}]}

jsonlite did the trick
library(jsonlite)
library("RCurl", lib.loc="~/R/win-library/3.1")
setwd("~/Kayak R")
url <- "http://www.kayak.com/h/explore/api?airport=SGF&v=1&flightMaxStops=0"
kkjson <- getURLContent(url)
kk_txt_from_json <- fromJSON(kkjson)
kk_table = data.frame(kk_txt_from_json)

Related

Edit multiple JSON files in R and saving them to CSV (Alternative for "for" loop)

I have multiple JSON files containing Tweets from Twitter. I want to import and edit them in R one by one.
For a single file my code looks like this:
data <- fromJSON("filename.json")
data <- data[c(1:3,13,14)]
data$lang <- ifelse(data$lang!="de",NA,data$lang)
data <- na.omit(data)
write_as_csv(data,"filename.csv")
Now I want to apply this code to multiple files. I found a "for" loop code here:
Loop in R to read many files
Applied to my problem it should look something like this:
setwd("~/Documents/Elections")
ldf <- list()
listjson <- dir(pattern = "*.json")
for (k in 1:length(listjson)){
data[k] <- fromJSON(listjson[k])
data[k] <- data[k][c(1:3,13,14)]
data[k]$lang <- ifelse(data[k]$lang!="de",NA,data[k]$lang)
data[k] <- na.omit(data[k])
filename <- paste(k, ".csv")
write_as_csv(listjson[k],filename)
}
But the first line in the loop already doesn't work.
> data[k] <- fromJSON(listjson[k])
Warning message:
In `[<-.data.frame`(`*tmp*`, k, value = list(createdAt = c(1505935036000, :
provided 35 variables to replace 1 variables
I can't figure out why. Also, I wonder if there is a nicer way to realize this problem without using a for loop. I read about the apply family, I just don't know how to apply it to my problem. Thanks in advance!
This is an example how my data looks:
https://drive.google.com/file/d/19cRS6p_mHbO6XXprfvc6NPZWuf_zG7jr/view?usp=sharing
It should work like this:
setwd("~/Documents/Elections")
listjson <- dir(pattern = "*.json")
for (k in 1:length(listjson)){
# Load the JSON that correspond to the k element in your list of files
data <- fromJSON(listjson[k])
# Select relevant columns from the dataframe
data <- data[,c(1:3,13,14)]
# Manipulate data
data$lang <- ifelse(data$lang!="de",NA,data$lang)
data <- na.omit(data)
filename <- paste(listjson[k], ".csv")
write_as_csv(data,filename)
}
For the second part of the question, apply applies a function over rows or columns of a dataframe. This is not your case, as you are looping through a vector of character to get filenames to be used somewhere else.

SDMX to dataframe with RSDMX in R

I'm trying to get data from the Lithuanian Statistics Department. They offer SDMX API with either XML or JSON (LSD).
The example XML shown is : https://osp-rs.stat.gov.lt/rest_xml/data/S3R629_M3010217 which downloads the XML file.
I tried following:
devtools::install_github("opensdmx/rsdmx")
library(rsdmx)
string <- "https://osp-rs.stat.gov.lt/rest_xml/data/S3R629_M3010217"
medianage <- readSDMX(string)
which results in error:
<simpleError in doTryCatch(return(expr), name, parentenv, handler): Invalid SDMX-ML file>
I also tried simply reading in the manually downloaded file
devtools::install_github("opensdmx/rsdmx")
library(rsdmx)
medianage <- readSDMX(file="rest_data_M3010217_20180116163251.xml" , isURL = FALSE)
medianage <- as.data.frame(medianage)
results in medianage being NULL (empty)
Maybe soneone has an idea, how I could solve downloading /transforming the data from LSD by using either:
https://osp-rs.stat.gov.lt/rest_xml/data/S3R629_M3010217
https://osp-rs.stat.gov.lt/rest_json/data/S3R629_M3010217
Thanks a lot!
In order to use rsdmx for this datasource, some enhancements have been added (see details at https://github.com/opensdmx/rsdmx/issues/141). You will need re-install rsdmx from Github (version 0.5-11)
You can use the url of the SDMX-ML file
library(rsdmx)
url <- "https://osp-rs.stat.gov.lt/rest_xml/data/S3R629_M3010217"
medianage <- readSDMX(url)
df <- as.data.frame(medianage)
A connector has been added in rsdmx to facilitate data query on the LSD (Lithuanian Statistics Department) SDMX endpoint. See below an example on how to use it.
sdmx <- readSDMX(providerId = "LSD", resource = "data",
flowRef = "S3R629_M3010217", dsd = TRUE)
df <- as.data.frame(sdmx, labels = TRUE)
The above example shows how to enrich the data.frame with code labels extracted from the SDMX Data Structure Definition (DSD). For this, specify dsd = TRUE with readSDMX. This allows then to use labels = TRUE when converting to data.frame. For filtering data with readSDMX, e.g. (startPeriod, endPeriod, code filters), check this page https://github.com/opensdmx/rsdmx/wiki#readsdmx-as-helper-function

JSON to data frame in R

I am trying to turn a .json file into a data frame for data visualization.
If I run the below code I get picture 1.
library(jsonlite)
jdata <- fromJSON("test.json")
data <- as.data.frame(jdata)
And when I try to transpose it, I get picture 2.
data2 <- as.data.frame(t(data))
This is how the json looks like raw:
I don't understand why column one has no name or is not part of the data frame (is jsonlite assuming these are tittles?). How can I overcome this?
I need a data frame from the json files:
Column1 (with the dates) | Column2 (I will divide it into values and coordinates
Try this for the input file test.json
library(jsonlite)
jdata <- read_json("test.json", simplifyVector = TRUE)

how to make a list from json string

I have a JSON file "adjFloatTest.data" .In R, i read the field "Volume" from that file using the following code:
json <- fromJSON("adjFloatTest.data")
volume <- json$volume
the value of the volume is as follows
> volume
$AAPL
$AAPL[[1]]
1980-12-12
16751200
$AAPL[[2]]
1980-12-15
100424081
$AAPL[[3]]
1980-12-16
0.1177374
$AAPL[[4]]
1980-12-17
7164476
$AAPL[[5]]
1980-12-18
5364366
Each elements corresponding to company,date,value. I want to store each dates into a list. How it is possible?
This will give you the list of dates
sapply(volume,names)
the following should work:
sapply(volume, function(x) lapply(x, "[[", 1))
but a reproducible example that could be copied+pasted would be helpful.
If the above doesnt work, please use something like dput(volume[1:2]) to offer some workable sample data.

Using \Sexpr{} in LaTeX tabular environment

I am trying to use \Sexpr{} to include values from my R objects in a LaTeX table. I am essentially trying to replicate the summary output of a lm object in R because xtable's built in methods xtable.lm and xtable.summary.lm don't seem to include the Fstats, adjusted R-squared, etc (all the stuff at the bottom of the summary printout of the lm object in R console) So I tried accomplishing this by building a matrix to replicate the xtable.summary.lm output then construct a data frame of the relevant info for the extra stuff so I can refer to the values using \Sexpr{}. I tried doing this by using add.to.row to append the \multicolumn{} command in order to merge all columns of the last row of the LaTeX table and then just pass all the information I need into that cell of the table.
The problem is that I get an "Undefined control sequence" for the \Sexpr{} expression in the \multicolumn{} expression. Are these two not compatible? If so, what am I doing wrong and if not does anyone know how to do what I am trying to do?
Thanks,
Here is the relevant part of my code:
<<Test, results=tex>>=
model1 <- lm(stndfnl ~ atndrte + frosh + soph)
# Build matrix to replicate xtable.summary.lm output
x <- summary(model1)
colnames <- c("Estimate", "Std. Error", "t value", "Pr(<|t|)")
rownames <- c("(Intercept)", attr(x$terms, "term.labels"))
fpval <- pf(x$fstatistic[1],x$fstatistic[2], x$fstatistic[3], lower.tail=FALSE)
mat1 <- matrix(coef(x), nrow=length(rownames), ncol=length(colnames), dimnames=list(rownames,colnames))
# Make a data frame for extra information to be called by \Sexpr in last row of table
residse <- x$sigma
degf <- x$df[2]
multr2 <- x$r.squared
adjr2 <- x$adj.r.squared
fstat <- x$fstatistic[1]
fstatdf1 <- x$fstatistic[2]
fstatdf2 <- x$fstatistic[3]
extradat <- data.frame(v1 = round(residse,4), v2 =degf, v3=round(multr2,4), v4=round(adjr2,4),v5=round(fstat,3), v6=fstatdf1, v7=fstatdf2, v8=round(fpval,6))
addtorow<- list()
addtorow$pos <-list()
addtorow$pos[[1]] <- dim(mat1)[1]
addtorow$command <-c('\\hline \\multicolumn{5}{l}{Residual standard error:\\Sexpr{extradat$v1}} \\\\ ')
print(xtable(mat1, caption="Summary Results for Regression in Equation \\eqref{model1} ", label="tab:model1"), add.to.row=addtorow, sanitize.text.function=NULL, caption.placement="top")
You don't need to have Sexpr in your R code; the R code can use the expressions directly. Sexpr is not a LaTeX command, even though it looks like one; it's an Sweave command, so it doesn't work to have it as output from R code.
Try
addtorow$command <-paste('\\hline \\multicolumn{5}{l}{Residual standard error:',
extradat$v1, '} \\\\ ')
Also, no need to completely recreate the matrix used by xtable, you can just build on the default output. Building on what you have above, something like:
mytab <- xtable(model1, caption="Summary Results", label="tab:model1")
addtorow$pos[[1]] <- dim(mytab)[1]
print(mytab, add.to.row=addtorow, sanitize.text.function=NULL,
caption.placement="top")
See http://people.su.se/~lundh/reproduce/sweaveintro.pdf for an example which you might be able to use as is.