What's wrong with my jsoncpp parse program & json file? - json

the json file:
// config
{
"is_train" : false,
"train" : {
"train_data" : "data.txt",
"save_model_path" : "svm_model.yaml",
"SVM" : {
"term_crit" : {
"method" : 1,
"iter" : 1000,
"eps" : 1e-6
},
"type" : 100,
"kernel_type" : 0,
"Cvalue" : 0.1,
"degree" : 0,
"gamma" : 0,
"coef0" : 0,
"nu" : 0,
"p" : 0,
"class_weights" : 0,
}
},
"predict" : {
"SVM" : {
"model" : "save_model.yaml",
"test_data" : "test_data.txt",
"test_ans" : "test_out.txt"
}
}
}
The problem is when I put "predict" in the front of "train", the params in "predict" can be parsed well,
value["predict"].isNull() will return false.
but "train" can't. And vice versa.
So how can I parse both correctly?

Related

How to scrape a JSON tag included deeply in a HTML Page

I'm trying to scrape Amazon's Goldbox page by trying to extract the JSON object responsible for the deal details (dealdetails).
I've tried to extract all the JSON within the 40th script tag, however I came out with 15000 lines of code
The JSON within the page is like this:
<script type="text/javascript">(function(f) {var _np=(window.P._namespace("GoldboxMobileMason"));if(_np.guardFatal){_np.guardFatal(f)(_np);}else{f(_np);}}(function(P) {
window.gb = window.gb || {};
{
"GDS" : {
"baseRetryInterval" : 4000,
"maxRetries" : 0,
"ajaxTimeout" : 10000
}
},
{
"GD" : {
"baseRetryInterval" : 4000,
"maxRetries" : 1,
"ajaxTimeout" : 10000
}
},
{
"WD" : {
"baseRetryInterval" : 4000,
"maxRetries" : 0,
"ajaxTimeout" : 10000
}
}
"dealDetails" : {
"3b009cf9" : {
"egressUrl" : "https://www.amazon.com/Meredith-Martha-Stewart-Living/dp/B002PXW0EO",
"maxDealPrice" : "5.49",
"offerID" : 000
"maxPrevPrice" : "5.49",
"minBAmount" : "49.9",
"itemType" : "SINGLE_ITEM",
"minPercentOff" : 89,
"items" : [
]
},
"f87c994b" : {
"egressUrl" : "https://www.amazon.com/s/?url=search-
"reviewAsin" : "B073VYKTZN",
"maxListPrice" : "159.99",
"isMAP" : "0",
"displayPriority" : "0",
"isEligibleForFreeShipping" : "0",
"isPrimeEligible" : "1",
"dealID" : "f87c994b",
"description" : "Save 50% on JUVEA All Natural Talalay Latex Pillows",
"minBAmount" : "99.99",
"currencyCode" : "USD",
"minListPrice" : "129.99",
"merchantID" : "A21VHZ1TV3ZUZI",
"score" : "0",
"bKind" : "OP",
"msToFeatureEnd" : "0",
},
"responseMetadata" : {
"continueRetries" : "1",
"baseRetryInterval" : "12000"
}
};
window.gb.controller.registerWidget(widgetToRegister);
});
}));</script>
I tried using Regex but I think I'm doing it wrong:
page = requests.get(primary_url, auth=('user', 'pass'), headers=headers)
soup = BeautifulSoup(page.text, 'lxml')
data = soup.select("[type='text/javascript']")[40]
raw = "dealdetails" + "\n".join(str(data.find("script")).split("\n")[4:-3])
print(raw)
json_obj = json.loads(raw)
The end result must be:
"dealDetails" : {
"3b009cf9" : {
"egressUrl" : "https://www.amazon.com/Meredith-Martha-Stewart-Living/dp/B002PXW0EO",
"maxDealPrice" : "5.49",
"offerID" : 000
"maxPrevPrice" : "5.49",
"minBAmount" : "49.9",
"itemType" : "SINGLE_ITEM",
"minPercentOff" : 89,
"items" : [
]
},
"f87c994b" : {
"egressUrl" : "https://www.amazon.com/s/?url=search-
"reviewAsin" : "B073VYKTZN",
"maxListPrice" : "159.99",
"isMAP" : "0",
"displayPriority" : "0",
"isEligibleForFreeShipping" : "0",
"isPrimeEligible" : "1",
"dealID" : "f87c994b",
"description" : "Save 50% on JUVEA All Natural Talalay Latex Pillows",
"minBAmount" : "99.99",
"currencyCode" : "USD",
"minListPrice" : "129.99",
"merchantID" : "A21VHZ1TV3ZUZI",
"score" : "0",
"bKind" : "OP",
"msToFeatureEnd" : "0",
},
"responseMetadata" : {
"continueRetries" : "1",
"baseRetryInterval" : "12000"
}
};
My best guess is:
re.search(r'^{.*?^}', script_content, re.MULTILINE | re.DOTALL)[0]
but if the indenting is different you will need to adjust it.
fixed_str = [your json above, fixed into valid json format]
target = fixed_str.replace("dealDetails",'xxx{ "dealDetails').split("xxx") #this splits the script tag by first removing preceding irrelevant stuff
final = target[1].replace("}\n};","}}\n}xxx").split('xxx') #this splits it again by dropping trailing irrelevant stuff
json_obj = json.loads(final[0])
json_obj
And, if all works well :), it should get you your desired end result...

Updating a CZML property after the property is declared

I'm trying to update some properties (namely "semiMinorAxis" and "semiMajorAxis") from a packet previously declared in a czml file.
The way I attempt to do this, is by overwriting the values of "semiMinorAxis" and "semiMajorAxis".
That is: between 12:00:00 and 13:00:00, ellipse_1 has a size of 300000, and between 13:00:00 and 14:00:00, it has a size of 600000.
Here is how I'm trying to do this in a simple csml:
[
{
"id" : "document",
"name" : "name",
"version" : "1.0",
"clock":
{
"interval": "2010-02-04T12:00:00Z/2010-02-04T14:00:00Z",
"currentTime": "2010-02-04T12:00:00Z",
"multiplier": 100
}
},
// I create ellipse_1
{
"id" : "ellipse_1",
"name" : "ellipse_1 (61.0666922, -107.9917071)",
"availability" : "2010-02-04T12:00:00Z/2010-02-04T13:00:00ZZ",
"position" : {
"cartographicDegrees" : [-107.9917071,61.0666922, 0.0]
},
"ellipse" : {
"semiMinorAxis" : 300000,
"semiMajorAxis" : 300000,
"height" : 0.0,
"material" : {
"solidColor" : {
"color" : {
"rgba" :[151,20,150, 255]
}
}
}
}
},
// I reuse the id and only change the values that I want the change
{
"id" : "ellipse_1"
},
"ellipse" : {
"interval" : "2010-02-04T13:00:00Z/2010-02-04T14:00:00ZZ",
"semiMinorAxis" : 600000,
"semiMajorAxis" : 600000
}
}
]
another option seems to be doing it this way:
{
"id" : "ellipse_1",
"name" : "ellipse_1 (61.0666922, -107.9917071)",
"availability" : "2010-02-04T12:00:00.00Z/2010-02-04T14:00:00.00Z",
"position" : {
"cartographicDegrees" : [-137.9917071,51.0666922, 0.0]
},
"ellipse" : [
{
"interval" : "2010-02-04T12:00:00.00Z/2010-02-04T13:00:00.00Z",
"semiMinorAxis" : 300000,
"semiMajorAxis" : 300000,
"height" : 200000.0,
"material" : {
"solidColor" : {
"color" : {
"rgba" :[253,152,38, 255]
}
}
}
},
{
"interval" : "2010-02-04T13:00:00.00Z/2010-02-04T14:00:00.00Z",
"semiMinorAxis" : 600000,
"semiMajorAxis" : 600000,
"height" : 200000.0,
"material" : {
"solidColor" : {
"color" : {
"rgba" :[253,152,38, 255]
}
}
}
}
]
}
By the way, here is the script to view it:
<script>
var viewer = new Cesium.Viewer('cesiumContainer', {
imageryProvider : new Cesium.ArcGisMapServerImageryProvider({url : 'http://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer'
}),
baseLayerPicker : false,
animation : true,
timeline : true,
});
var dataSourcePromise = Cesium.CzmlDataSource.load('../Apps/oscar.czml');
viewer.dataSources.add(dataSourcePromise);
viewer.flyTo(dataSourcePromise).then(function(result){
});
</script>
Unfortunately, these are clearly not the way to update values of an object in a csml file. Am I missing something obvious?
Thanks!

Extracting content from a nested json

I am trying to used the gson library to parse a json file.I want to get a list of names and URLs of all states within a JSON.I am not able to understand the structure of the json object and how to retrieve this data,since any structure i create is returning null values . The sample structure of the JSON is
{
"states" : {
"state53" : {
"name" : "state53",
"url" : "http://cv4a.org/veterans-group-calls-accountability-va-funds-boost/",
"candidateElements" : [ {
"top" : 202,
"left" : 58,
"xpath" : "/HTML[1]/BODY[1]/DIV[2]/DIV[1]/DIV[2]/DIV[1]/ARTICLE[1]/HEADER[1]/P[1]/A[1]",
"width" : 135,
"height" : 20
}, {
"top" : 1307,
"left" : 225,
"xpath" : "/HTML[1]/BODY[1]/DIV[2]/DIV[1]/DIV[2]/DIV[1]/OL[1]/LI[1]/ARTICLE[1]/HEADER[1]/TIME[1]/A[1]",
"width" : 191,
"height" : 22
}, {
"top" : 1374,
"left" : 912,
"xpath" : "/HTML[1]/BODY[1]/DIV[2]/DIV[1]/DIV[2]/DIV[1]/OL[1]/LI[1]/ARTICLE[1]/A[1]",
"width" : 78,
"height" : 38
}, {
"top" : 0,
"left" : 0,
"xpath" : "/HTML[1]/BODY[1]/DIV[2]/DIV[1]/DIV[2]/DIV[1]/SECTION[1]/DIV[1]/P[1]/A[1]",
"width" : 169,
"height" : 18
} ],
"fanIn" : 1,
"fanOut" : 3,
"id" : 53,
"failedEvents" : [ "xpath /HTML[1]/BODY[1]/DIV[2]/DIV[1]/DIV[2]/DIV[1]/SECTION[1]/DIV[1]/P[1]/A[1]" ]
},
"state9" : {
"name" : "state9",
"url" : "http://cv4a.org/blog/#",
"candidateElements" : [ ],
"fanIn" : 1,
"fanOut" : 0,
"id" : 9,
"failedEvents" : [ ]
},
public static void main(String[] args) {
JsonElement jsonElement = new JsonParser().parse(jsonString);
JsonObject statesObj = jsonElement.getAsJsonObject();
statesObj = statesObj.getAsJsonObject("states");
final Set<Map.Entry<String, JsonElement>> statesEntries = statesObj.entrySet();
for (Map.Entry<String, JsonElement> state : statesEntries) {
JsonObject stateObj = state.getValue().getAsJsonObject();
String name = stateObj.get("name").getAsString();
//....
}
}
Or you can create classes (like State, CandidateElement) with fields (name, url, e.t.c) and use auto serialization/deserialization. See documentation

AngularJS Display value

I have in my .html:
{{data}}
It will display:
[ { "images" : [ { "__v" : 0,
"_id" : "542e57a709d2d60000c93953",
"name" : "image1",
"url" : "http://www.syll.com"
},
{ "__v" : 0,
"_id" : "543249050fcae2f082ca3e70",
"name" : "imageOCR1",
"url_image" : "http://meta-e.aib.uni-linz.ac.at/ocr.gif"
},
{ "__v" : 0,
"_id" : "543249050fcae2f082ca3e71",
"name" : "imageOCR2",
"url_image" : "http://www.textcreationpartnership.org/xxx.jpg"
}
],
"itemCount" : 70,
"pageCount" : 7
} ]
But I would like to display the value in "pageCount" so 7.
How I can do that without ng-repeat?
btw, y controller works fine.
I tried:
{{data.pageCount}}
But it doesn't work.
Thanks!
[EDIT] changed with the righ JSON and formatted.
It looks like your data is all contained within a single-element array. Have you tried the following?
{{data[0].pageCount}}

How could I monitor mongodb in real-time?

I would like to display real time mongodb monitoring statistics on a website.
I looked at mongostat, but it just does not seem to provide a real time rest API or any sort of json output.
Is there any way I could retrieve real time data from mongostat?
I was thinking about using some tool to stream stdout to a json file, but I thought maybe some of you had another idea.
Thanks in advance!
japel
You've got a couple of options here. You can run mongod with options to expose an http interface that can return some stats in json format. Start mondod with the --httpinterface option and it will expose some stats on the instance at a port 1000 higher than your normal access port. So, if I run:
mongod --httpinterface
In addition to getting mongodb access at localhost:27017 I'll also get this http interface at localhost:27018.
For your use case I'd try hitting the serverStatus endpoint - example call and returned status below:
http://localhost:28017/serverStatus?text=1
{ "host" : "myhost",
"version" : "2.6.0",
"process" : "mongod",
"pid" : { "$numberLong" : "2871" },
"uptime" : 27,
"uptimeMillis" : { "$numberLong" : "27468" },
"uptimeEstimate" : 25,
"localTime" : { "$date" : "2014-05-23T07:25:05.793-0700" },
"asserts" : { "regular" : 0,
"warning" : 0,
"msg" : 0,
"user" : 0,
"rollovers" : 0 },
"backgroundFlushing" : { "flushes" : 0,
"total_ms" : 0,
"average_ms" : 0,
"last_ms" : 0,
"last_finished" : { "$date" : "1969-12-31T16:00:00.000-0800" } },
"connections" : { "current" : 0,
"available" : 26214,
"totalCreated" : { "$numberLong" : "4" } },
"cursors" : { "note" : "deprecated, use server status metrics",
"clientCursors_size" : 0,
"totalOpen" : 0,
"pinned" : 0,
"totalNoTimeout" : 0,
"timedOut" : 0 },
"dur" : { "commits" : 29,
"journaledMB" : 0,
"writeToDataFilesMB" : 0,
"compression" : 0,
"commitsInWriteLock" : 0,
"earlyCommits" : 0,
"timeMs" : { "dt" : 3013,
"prepLogBuffer" : 0,
"writeToJournal" : 0,
"writeToDataFiles" : 0,
"remapPrivateView" : 0 } },
"extra_info" : { "note" : "fields vary by platform",
"page_faults" : 12 },
"globalLock" : { "totalTime" : { "$numberLong" : "27468000" },
"lockTime" : { "$numberLong" : "476591" },
"currentQueue" : { "total" : 0,
"readers" : 0,
"writers" : 0 },
"activeClients" : { "total" : 0,
"readers" : 0,
"writers" : 0 } },
"indexCounters" : { "accesses" : 0,
"hits" : 0,
"misses" : 0,
"resets" : 0,
"missRatio" : 0 },
"locks" : { "." : { "timeLockedMicros" : { "R" : { "$numberLong" : "1104" },
"W" : { "$numberLong" : "476591" } },
"timeAcquiringMicros" : { "R" : { "$numberLong" : "351819" },
"W" : { "$numberLong" : "2178" } } },
"admin" : { "timeLockedMicros" : { "r" : { "$numberLong" : "96" },
"w" : { "$numberLong" : "0" } },
"timeAcquiringMicros" : { "r" : { "$numberLong" : "6" },
"w" : { "$numberLong" : "0" } } },
"local" : { "timeLockedMicros" : { "r" : { "$numberLong" : "6082" },
"w" : { "$numberLong" : "11" } },
"timeAcquiringMicros" : { "r" : { "$numberLong" : "15" },
"w" : { "$numberLong" : "2" } } },
<LIST OF DATABASES REMOVED>
"network" : { "bytesIn" : 0,
"bytesOut" : 0,
"numRequests" : 0 },
"opcounters" : { "insert" : 1,
"query" : 1,
"update" : 0,
"delete" : 0,
"getmore" : 0,
"command" : 2 },
"opcountersRepl" : { "insert" : 0,
"query" : 0,
"update" : 0,
"delete" : 0,
"getmore" : 0,
"command" : 0 },
"recordStats" : { "accessesNotInMemory" : 0,
"pageFaultExceptionsThrown" : 0,
"admin" : { "accessesNotInMemory" : 0,
"pageFaultExceptionsThrown" : 0 },
<LIST OF DATABASES REMOVED>
"writeBacksQueued" : false,
"mem" : { "bits" : 64,
"resident" : 162,
"virtual" : 5762,
"supported" : true,
"mapped" : 1616,
"mappedWithJournal" : 3232 },
"metrics" : { "cursor" : { "timedOut" : { "$numberLong" : "0" },
"open" : { "noTimeout" : { "$numberLong" : "0" },
"pinned" : { "$numberLong" : "0" },
"total" : { "$numberLong" : "0" } } },
"document" : { "deleted" : { "$numberLong" : "0" },
"inserted" : { "$numberLong" : "1" },
"returned" : { "$numberLong" : "0" },
"updated" : { "$numberLong" : "0" } },
"getLastError" : { "wtime" : { "num" : 0,
"totalMillis" : 0 },
"wtimeouts" : { "$numberLong" : "0" } },
"operation" : { "fastmod" : { "$numberLong" : "0" },
"idhack" : { "$numberLong" : "0" },
"scanAndOrder" : { "$numberLong" : "0" } },
"queryExecutor" : { "scanned" : { "$numberLong" : "0" },
"scannedObjects" : { "$numberLong" : "0" } },
"record" : { "moves" : { "$numberLong" : "0" } },
"repl" : { "apply" : { "batches" : { "num" : 0,
"totalMillis" : 0 },
"ops" : { "$numberLong" : "0" } },
"buffer" : { "count" : { "$numberLong" : "0" },
"maxSizeBytes" : 268435456,
"sizeBytes" : { "$numberLong" : "0" } },
"network" : { "bytes" : { "$numberLong" : "0" },
"getmores" : { "num" : 0,
"totalMillis" : 0 },
"ops" : { "$numberLong" : "0" },
"readersCreated" : { "$numberLong" : "0" } },
"preload" : { "docs" : { "num" : 0,
"totalMillis" : 0 },
"indexes" : { "num" : 0,
"totalMillis" : 0 } } },
"storage" : { "freelist" : { "search" : { "bucketExhausted" : { "$numberLong" : "0" },
"requests" : { "$numberLong" : "0" },
"scanned" : { "$numberLong" : "0" } } } },
"ttl" : { "deletedDocuments" : { "$numberLong" : "0" },
"passes" : { "$numberLong" : "0" } } },
"ok" : 1 }
Second option would be to enable the REST api inteface for mongod, option is --rest. Documentation on it is here: http://docs.mongodb.org/ecosystem/tools/http-interfaces/ . However, it does open some security holes and it's not recommended for production use: http://docs.mongodb.org/manual/core/security-interface/