JSON newbie here. Could you please help with parsing JSON files using R. I did try jsonlite & rjson, but keep getting errors.
Below is the data retrieved via the api.
data <- GET("http://svcs.ebay.com/services/search/FindingService/v1?OPERATION-NAME=findItemsByKeywords&SERVICE-VERSION=1.0.0&SECURITY-APPNAME=GLOBAL-ID=EBAY-US&RESPONSE-DATA-FORMAT=JSON&callback=_cb_findItemsByKeywords&REST-PAYLOAD&keywords=harry%20potter&paginationInput.entriesPerPage=10")
The JSON looks like this:
/**/_cb_findItemsByKeywords({
"findItemsByKeywordsResponse":[
{
"ack":[
"Success"
],
"version":[
"1.13.0"
],
"timestamp":[
"2016-01-29T16:36:25.984Z"
],
"searchResult":[
{
"#count":"1",
"item":[
{
"itemId":[
"371533364795"
],
"title":[
"Harry Potter: Complete 8-Film Collection (DVD, 2011, 8-Disc Set)"
],
"globalId":[
"EBAY-US"
],
"primaryCategory":[
{
"categoryId":[
"617"
],
"categoryName":[
"DVDs & Blu-ray Discs"
]
}
],
"galleryURL":[
"http:\/\/thumbs4.ebaystatic.com\/m\/mn5Agt0HFD89L7_-lqfrZZw\/140.jpg"
],
"viewItemURL":[
"http:\/\/www.ebay.com\/itm\/Harry-Potter-Complete-8-Film-Collection-DVD-2011-8-Disc-Set-\/371533364795"
],
"productId":[
{
"#type":"ReferenceID",
"__value__":"110258144"
}
],
"paymentMethod":[
"PayPal"
],
"autoPay":[
"false"
],
"postalCode":[
"60131"
],
"location":[
"Franklin Park,IL,USA"
],
"country":[
"US"
],
"shippingInfo":[
{
"shippingServiceCost":[
{
"#currencyId":"USD",
"__value__":"0.0"
}
],
"shippingType":[
"FlatDomesticCalculatedInternational"
],
"shipToLocations":[
"US",
"CA",
"GB",
"AU",
"AT",
"BE",
"FR",
"DE",
"IT",
"JP",
"ES",
"TW",
"NL",
"CN",
"HK",
"MX",
"DK",
"RO",
"SK",
"BG",
"CZ",
"FI",
"HU",
"LV",
"LT",
"MT",
"EE",
"GR",
"PT",
"CY",
"SI",
"SE",
"KR",
"ID",
"ZA",
"TH",
"IE",
"PL",
"RU",
"IL"
],
"expeditedShipping":[
"false"
],
"oneDayShippingAvailable":[
"false"
],
"handlingTime":[
"1"
]
}
],
"sellingStatus":[
{
"currentPrice":[
{
"#currencyId":"USD",
"__value__":"26.95"
}
],
"convertedCurrentPrice":[
{
"#currencyId":"USD",
"__value__":"26.95"
}
],
"sellingState":[
"Active"
],
"timeLeft":[
"P16DT3H12M6S"
]
}
],
"listingInfo":[
{
"bestOfferEnabled":[
"false"
],
"buyItNowAvailable":[
"false"
],
"startTime":[
"2016-01-15T19:43:31.000Z"
],
"endTime":[
"2016-02-14T19:48:31.000Z"
],
"listingType":[
"StoreInventory"
],
"gift":[
"false"
]
}
],
"returnsAccepted":[
"true"
],
"condition":[
{
"conditionId":[
"1000"
],
"conditionDisplayName":[
"Brand New"
]
}
],
"isMultiVariationListing":[
"false"
],
"topRatedListing":[
"true"
]
}
]
}
],
"paginationOutput":[
{
"pageNumber":[
"1"
],
"entriesPerPage":[
"1"
],
"totalPages":[
"138112"
],
"totalEntries":[
"138112"
]
}
],
"itemSearchURL":[
"http:\/\/www.ebay.com\/sch\/i.html?_nkw=harry+potter&_ddo=1&_ipg=1&_pgn=1"
]
}
]
})
The problem is that your data is not json, but it is JavaScript, jsonp to be exactly. If you just want to parse the JSON data you have to strip off the padding callback function.
req <- httr::GET("http://svcs.ebay.com/services/search/FindingService/v1?OPERATION-NAME=findItemsByKeywords&SERVICE-VERSION=1.0.0&SECURITY-APPNAME=YOUR-APP-123456&GLOBAL-ID=EBAY-US&RESPONSE-DATA-FORMAT=JSON&callback=_cb_findItemsByKeywords&REST-PAYLOAD&keywords=harry%20potter&paginationInput.entriesPerPage=10")
txt <- content(req, "text")
json <- sub("/**/_cb_findItemsByKeywords(", "", txt, fixed = TRUE)
json <- sub(")$", "", json)
mydata <- jsonlite::fromJSON(json)
Extra credit: alternatively you could use an actual JavaScript engine to parse the JavaScript:
library(V8)
ctx <- V8::v8()
ctx$eval("var out;")
ctx$eval("function _cb_findItemsByKeywords(x){out = x;}")
ctx$source("http://svcs.ebay.com/services/search/FindingService/v1?OPERATION-NAME=findItemsByKeywords&SERVICE-VERSION=1.0.0&SECURITY-APPNAME=YOUR-APP-123456&GLOBAL-ID=EBAY-US&RESPONSE-DATA-FORMAT=JSON&callback=_cb_findItemsByKeywords&REST-PAYLOAD&keywords=harry%20potter&paginationInput.entriesPerPage=10")
mydata <- ctx$get("out")
First, your json file seems to have a little issue. It should have started in the opening bracket "[".
I removed the text before it and I've tried this code, which worked perfectly:
library(rjson)
obj <- fromJSON(file = "v2.json")
That returned a list in obj with the contents of v2.json.
EDITED: Including a full functional soltion:
library(rjson)
library(stringr)
obj <- read.table("v2.json", sep = "\n", stringsAsFactors = FALSE, quote = "")
# Gets the first line with the string "[" ("\\" for scape)
firstline <- grep("\\[", obj[,1])[1]
# Gets the position of the string "[" in the line
fpos <- which(strsplit(obj[firstline, 1], "")[[1]] == "[")
# Gets the last line with the string "]"
lastline <- grep("\\]", obj[,1])
lastline <- lastline[length(lastline)]
# Gets the position of the string "]" in the line
lpos <- which(strsplit(obj[lastline, 1], "")[[1]] == "]")
# Changes the lines with the first "[" and the last "]" to keep the text
# between both (after "[" and before "]") if there is any.
obj[firstline, 1] <- str_sub(obj[firstline, 1], fpos)
obj[lastline, 1] <- str_sub(obj[lastline, 1], 1, lpos)
obj2 <- data.frame(obj[firstline:lastline, 1])
write.table(obj2, "v3.json", row.names = FALSE, col.names = FALSE, quote = FALSE)
obj3 <- fromJSON(file = "v3.json")
Related
I would like to do the following using terraform:
I have 2 JSONs:
1.json:
[
{
"description": "description1",
"url": "url1",
"data": "data1"
},
{
"description": "description2",
"url": "url2",
"data": "data2",
"action": "action2"
},
{
"description": "description3",
"url": "url3",
"data": "data3"
}
]
2.json:
[
{
"description": "description1",
"url": "url1",
"data": "data1"
},
{
"description": "description2_new",
"url": "url2",
"data": "data2_new"
},
{
"description": "description4",
"url": "url4",
"data": "data4"
}
]
and I want to merge them into one. Dictionaries from the second JSON should override dictionaries from the first one if url key is the same. I.e. combined JSON should look like:
[
{
"description": "description1",
"url": "url1",
"data": "data1"
},
{
"description": "description2_new",
"url": "url2",
"data": "data2_new"
},
{
"description": "description3",
"url": "url3",
"data": "data3"
},
{
"description": "description4",
"url": "url4",
"data": "data4"
}
]
Using python I can easily do it:
import json
with open('1.json') as f:
json1 = json.load(f)
with open('2.json') as f:
json2 = json.load(f)
def list_to_dict(json_list):
res_dict = {}
for d in json_list:
res_dict[d['url']] = d
return res_dict
def merge_json(json1, json2):
j1 = list_to_dict(json1)
j2 = list_to_dict(json2)
j1.update(j2)
res_list = []
for key in j1.keys():
res_list.append(j1[key])
return res_list
print(json.dumps(merge_json(json1, json2), indent=4))
How can I do that using terraform?
Using terraform 0.12.x
$ cat main.tf
locals {
# read from files and turn into json
list1 = jsondecode(file("1.json"))
list2 = jsondecode(file("2.json"))
# iterate over lists and turn url into a unique key
dict1 = { for item in local.list1 : item.url => item }
dict2 = { for item in local.list2 : item.url => item }
# combine both dictionaries so values converge
# only take its values
merged = values(merge(local.dict1, local.dict2))
}
output "this" {
value = local.merged
}
$ terraform apply
Apply complete! Resources: 0 added, 0 changed, 0 destroyed.
Outputs:
this = [
{
"data" = "data1"
"description" = "description1"
"url" = "url1"
},
{
"data" = "data2_new"
"description" = "description2_new"
"url" = "url2"
},
{
"data" = "data3"
"description" = "description3"
"url" = "url3"
},
{
"data" = "data4"
"description" = "description4"
"url" = "url4"
},
]
Terraform supports expanding a list into function parameters using the ... operator. This will allow an arbitrary number of documents to be read.
(I'm not sure, but I believe this feature was added in v0.15)
For this example, I added a new file 3.json with the contents:
[
{
"description": "description4_new",
"url": "url4",
"data": "data4_new"
}
]
For main.tf, I'm using the same logic as #someguyonacomputer's answer:
$ cat main.tf
locals {
jsondocs = [
for filename in fileset(path.module, "*.json") : jsondecode(file(filename))
]
as_dicts = [
for arr in local.jsondocs : {
for obj in arr : obj.url => obj
}
]
# This is where the '...' operator is used
merged = merge(local.as_dicts...)
}
output "as_list" {
value = values(local.merged)
}
Result:
Changes to Outputs:
+ as_list = [
+ {
+ data = "data1"
+ description = "description1"
+ url = "url1"
},
+ {
+ data = "data2_new"
+ description = "description2_new"
+ url = "url2"
},
+ {
+ data = "data3"
+ description = "description3"
+ url = "url3"
},
+ {
+ data = "data4_new"
+ description = "description4_new"
+ url = "url4"
},
]
References:
Terraform Docs -- Function Calls # Expanding Function Arguments
I want to merge several lists into one JSON array.
These are my two lists:
address = ['address1','address2']
temp = ['temp1','temp2']
I combine both lists by the following call and create a JSON .
new_list = list(map(list, zip(address, temp)))
jsonify({
'data': new_list
})
This is my result for the call:
{
"data": [
[
"address1",
"temp1"
],
[
"address2",
"temp2"
]
]
}
However, I would like to receive the following issue. How do I do that and how can I insert the identifier address and hello.
{
"data": [
{
"address": "address1",
"temp": "temp1"
},
{
"address": "address2",
"temp": "temp2"
}
]
}
You can use a list-comprehension:
import json
address = ['address1','address2']
temp = ['temp1','temp2']
d = {'data': [{'address': a, 'temp': t} for a, t in zip(address, temp)]}
print( json.dumps(d, indent=4) )
Prints:
{
"data": [
{
"address": "address1",
"temp": "temp1"
},
{
"address": "address2",
"temp": "temp2"
}
]
}
You can just change your existing code like this. That lambda function will do the trick of converting it into a dict.
address = ['address1','address2']
temp = ['temp1','temp2']
new_list = list(map(lambda x : {'address': x[0], 'temp': x[1]}, zip(address, temp)))
jsonify({
'data': new_list
})
I've json like below, which i got from below URL:
{
"info" : {
"1484121600" : [
212953175.053333,212953175.053333,null
],
"1484125200" : [
236203014.133333,236203014.133333,236203014.133333
],
"1484128800" : [
211414832.968889,null,211414832.968889
],
"1484132400" : [
208604573.791111,208604573.791111,208604573.791111
],
"1484136000" : [
231358374.288889,231358374.288889,231358374.288889
],
"1484139600" : [
210529301.097778,210529301.097778,210529301.097778
],
"1484143200" : [
212009682.04,null,212009682.04
],
"1484146800" : [
232364759.566667,232364759.566667,232364759.566667
],
"1484150400" : [
218138788.524444,218138788.524444,218138788.524444
],
"1484154000" : [
218883301.282222,218883301.282222,null
],
"1484157600" : [
237874583.771111,237874583.771111,237874583.771111
],
"1484161200" : [
216227081.924444,null,216227081.924444
],
"1484164800" : [
227102054.082222,227102054.082222,null
]
},
"summary" : "data",
"end" : 1484164800,
"start": 1484121600
}
I'm fetching this json from some url using jsonlite package in R like below:
library(jsonlite)
input_data <- fromJSON(url)
timeseries <- input_data[['info']] # till here code is fine
abc <- data.frame(ds = names(timeseries[[1]]),
y = unlist(timeseries[[1]]), stringsAsFactors = FALSE)
(something is wrong in above line)
I need to convert this data in timeseries variable into data frame; which will have index column as the epoch time and no. of columns in dataframe will depend upon no. of values in array and all arrays will have same no. of values for sure. But no. of values in array can be 1 0r 2 or etc; it is not fixed. Like in below example array size is 3 for all.
for eg : dataframe should look like:
index y1 y2 y3
1484121600 212953175.053333 212953175.053333 null
1484125200 236203014.133333 236203014.133333 236203014.133333
Please suggest how do I do this in R. I'm new to it.
JSON with only 1 item in array:
{
"info": {
"1484121600": [
212953175.053333
],
"1484125200": [
236203014.133333
],
"1484128800": [
211414832.968889
],
"1484132400": [
208604573.791111
],
"1484136000": [
231358374.288889
],
"1484139600": [
210529301.097778
],
"1484143200": [
212009682.04
],
"1484146800": [
232364759.566667
],
"1484150400": [
218138788.524444
],
"1484154000": [
218883301.282222
],
"1484157600": [
237874583.771111
],
"1484161200": [
216227081.924444
],
"1484164800": [
227102054.082222
]
},
"summary": "data",
"end": 1484164800,
"start": 1484121600
}
Consider binding the list of json values to a matrix with sapply(), then transpose columns to rows with t(), and finally convert to dataframe with data.frame()
abc <- data.frame(t(sapply(timeseries, c)))
colnames(abc) <- gsub("X", "y", colnames(abc))
abc
# y1 y2 y3
# 1484121600 212953175 212953175 NA
# 1484125200 236203014 236203014 236203014
# 1484128800 211414833 NA 211414833
# 1484132400 208604574 208604574 208604574
# 1484136000 231358374 231358374 231358374
# 1484139600 210529301 210529301 210529301
# 1484143200 212009682 NA 212009682
# 1484146800 232364760 232364760 232364760
# 1484150400 218138789 218138789 218138789
# 1484154000 218883301 218883301 NA
# 1484157600 237874584 237874584 237874584
# 1484161200 216227082 NA 216227082
# 1484164800 227102054 227102054 NA
I'm using rCharts Leaflet maps to display polygons on map on R.
Using the Leaflet's geoJson I created some polygons and added them to the map. However, those polygons are filled with a default blue color. I'm trying to give them a different color, but no success.
For an example, I used the folloeing JSON, tested it in geojson.io and it came up green, however the R package still plots it in blue, how can I enforce the color?
JSON:
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"stroke": "#555555",
"stroke-width": 2,
"stroke-opacity": 1,
"fill": "#00f900",
"fill-opacity": 0.5
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-74.06982421875,
40.64730356252251
],
[
-74.06982421875,
40.79717741518769
],
[
-73.80615234375,
40.79717741518769
],
[
-73.80615234375,
40.64730356252251
],
[
-74.06982421875,
40.64730356252251
]
]
]
}
}
]
}
R:
jsonx <- (JSON above)
polys = RJSONIO::fromJSON(jsonX)
map.center <- c(38,-95)
myMap<-Leaflet$new()
myMap$setView(map.center, 4)
myMap$tileLayer(provider = "Esri.WorldGrayCanvas")
myMap$geoJson(polys)
myMap$set(dom = 'myChart2')
myMap
While the rCharts implementation was nice, RStudio's leaflet package based on htmlwidgets is much more full-featured and robust. If you can use it instead, here is an answer. Note, nothing needs to be done. leaflet will pick up the fill in your geoJSON.
# uncomment to install the most recent from github
# devtools::install_github("rstudio/leaflet")
# or older cran #install.packages("leaflet")
library(leaflet)
gj <- '
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"stroke": "#555555",
"stroke-width": 2,
"stroke-opacity": 1,
"fill": "#00f900",
"fill-opacity": 0.5
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-74.06982421875,
40.64730356252251
],
[
-74.06982421875,
40.79717741518769
],
[
-73.80615234375,
40.79717741518769
],
[
-73.80615234375,
40.64730356252251
],
[
-74.06982421875,
40.64730356252251
]
]
]
}
}
]
}
'
leaflet() %>%
addTiles() %>%
setView( -74.1, 40.7, zoom = 10) %>%
addGeoJSON( gj )
# to show fill works let's change it with gsub
leaflet() %>%
addTiles() %>%
setView( -74.1, 40.7, zoom = 10) %>%
addGeoJSON(
gsub(
x = gj
,pattern = '(\\"fill\": \\"#00f900\\",)'
,replacement = ""
)
# demo addGeoJSON fillColor argument
,fillColor = 'green'
)
I try reading a JSON file from R using rjson but keep getting errors. I validated the JSON file using various online validators. Here is the content of the JSON file:
{
"scenarios": [
{
"files": {
"type1": "/home/blah/Desktop/temp/scen_0.type1",
"type2": "/home/blah/Desktop/temp/scen_0.type2"
},
"ID": "scen_0",
"arr": [],
"TypeToElementStatsFilename": {
"type1": "/home/blah/Desktop/temp/scen_0.type1.elements",
"type2": "/home/blah/Desktop/temp/scen_0.type2.elements"
}
}
],
"randomSeed": "39327314969888",
"zone": {
"length": 1000000,
"start": 1
},
"instanceFilename": "/home/blah/bloo/data/XY112.zip",
"txtFilename": "/home/blah/bloo/data/XY112.txt",
"nSimulations": 2,
"TypeTodbFilename": {
"type1": "/home/blah/bloo/data/map.type1.oneAmb.XY112.out"
},
"arr": {
"seg11": {
"length": 1000,
"start": 147000
},
"seg12": {
"length": 1000,
"start": 153000
},
"seg5": {
"length": 1000,
"start": 145000
},
"seg6": {
"length": 1000,
"start": 146000
},
"seg1": {
"length": 100,
"start": 20000
}
},
"outPath": "/home/blah/Desktop/temp",
"instanceID": "XY112",
"arrIds": [
"seg5",
"seg6",
"seg1",
"seg11",
"seg12"
],
"truth": {
"files": {
"type1": "/home/blah/Desktop/temp/truth.type1",
"type2": "/home/blah/Desktop/temp/truth.type2"
},
"ID": "truth",
"TypeToElementStatsFilename": {
"type1": "/home/blah/Desktop/temp/truth.type1.elements",
"type2": "/home/blah/Desktop/temp/truth.type2.elements"
}
}
}
And the error:
> json_file <- "~/json"
> json_data <- fromJSON(paste(readLines(json_file), collapse=""))
Error in fromJSON(paste(readLines(json_file), collapse = "")) :
unexpected character: :
RJSON freaks out about empty arrays.
fromJSON( '{ "arr": [ ] }')
Error in fromJSON("{ \"arr\": [ ] }") : unexpected character: :
You can try the fromJSON function in the RJSONIO package hosted at http://www.omegahat.org. It seems to read the file fine.
There's a fix for this.
Create a new function to replace the existing getURL function used in RCurl and you should have your solution.
myGetURL <- function(...) {
rcurlEnv <- getNamespace("RCurl")
mapUnicodeEscapes <- get("mapUnicodeEscapes", rcurlEnv)
unlockBinding("mapUnicodeEscapes", rcurlEnv)
assign("mapUnicodeEscapes", function(str) str, rcurlEnv)
on.exit({
assign("mapUnicodeEscapes", mapUnicodeEscapes, rcurlEnv)
lockBinding("mapUnicodeEscapes", rcurlEnv)
}, add = TRUE)
return(getURL(...))
}
Test:
> json <- myGetURL("http://abicky.net/hatena/rcurl/a.json")
> cat(json, fill = TRUE)
{"a":"\\\"\u0030\\\""}
> fromJSON(json)
$a
[1] "\\\"0\\\""