Collecting Data from Facebook posts using JPath / JSONPath - json

I have been at this for hours. How do you get filtering to work?
None of the solutions online works.
All I want to do is grab the comments(highlighted by these <<<>>>) from the json below from the name “Tori Smith”. I’ve been using this app to test http://jsonpath.curiousconcept.com/.
This is as far as I have gotten: ‘comments.data..from.id’
Data:
{
"id":"12029930209393029_10100748134340048",
"from":{
"id":"12029930209393029",
"name":"Tori Smith"
},
"message":"Buy this now",
"picture":"https:\/\/fbexternal-a.akamaihd.net\/app_full_proxy.php?app=141861192518680&v=1&size=z&cksum=a0471c1f5895cd22c74474fabc989c7e&src=http%3A%2F%2Fmedia3.policymic.com%2FYTM2OWUwN2Q0MSMvRnlpUTkxZU9DMWtGWFZ6TUNiYWh3RkxveXRjPS8yeDE6MTI4Nng2MjIvMTI4MHg2MjAvZmlsdGVyczpxdWFsaXR5KDcwKS9odHRwOi8vczMuYW1hem9uYXdzLmNvbS9wb2xpY3ltaWMtaW1hZ2VzL2JqNTdvbTZxZGd1N3ZpaGtvcWVrNnlzaTI5bW55dGZqanEwMWhuc3FqYjgxc3dkeGcyN2F6czV0eXV0bWJzZTguanBn.jpg",
"link":"http:\/\/mic.com\/articles\/101252\/this-tiny-box-will-let-you-stay-anonymous-on-the-internet",
"name":"This Tiny Box Will Let You Stay Anonymous on the Internet",
"caption":"Mic",
"description":"A simple and elegant solution to a major technology problem.",
"icon":"https:\/\/fbcdn-photos-d-a.akamaihd.net\/hphotos-ak-xpf1\/t39.2081-0\/10333103_752719651432828_1597152122_n.png",
"actions":[
{
"name":"Comment",
"link":"https:\/\/www.facebook.com\/12029930209393029\/posts\/10100748134340048"
},
{
"name":"Like",
"link":"https:\/\/www.facebook.com\/12029930209393029\/posts\/10100748134340048"
}
],
"privacy":{
"description":"Your friends",
"value":"ALL_FRIENDS",
"friends":"",
"networks":"",
"allow":"",
"deny":""
},
"type":"link",
"status_type":"app_created_story",
"application":{
"name":"Mic",
"namespace":"micmediaapp",
"id":"141861192518680"
},
"created_time":"2014-10-14T14:54:54+0000",
"updated_time":"2014-10-15T03:55:19+0000",
"comments":{
"data":[
{
"id":"10100748134340048_10100748984636048",
"from":{
"id":"123094958239849866",
"name":"Don Draper"
},
"message":"Even if I use Tor",
"can_remove":true,
"created_time":"2014-10-15T03:03:29+0000",
"like_count":0,
"user_likes":false
},
{
"id":"10100748134340048_10100749036726658",
"from":{
"id":"12029930209393029",
"name":"Tori Smith"
},
<<< "message":"Yes this can go with you and I think it works for all apps outside of TOR browser", >>>
"can_remove":true,
"created_time":"2014-10-15T03:55:19+0000",
"like_count":0,
"user_likes":false
},
{
"id":"10100748134340048_1010074901234658",
"from":{
"id":"12029930209393029",
"name":"Tori Smith"
},
<<< "message":"Second Text", >>>
"can_remove":true,
"created_time":"2014-10-15T03:55:19+0000",
"like_count":0,
"user_likes":false
}
],
"paging":{
"cursors":{
"after":"WTI5dGJXVnVkRjlqZFhKemIzSTZNVEF4TURBM05Ea3dNelkzTWpZMk5UZzZNVFF4TXpNME5UTXhPVG95",
"before":"WTI5dGJXVnVkRjlqZFhKemIzSTZNVEF4TURBM05EZzVPRFEyTXpZd05EZzZNVFF4TXpNME1qSXdPVG94"
}
}
}
}

To get at the comments you need to introduce a filter expression for filtering to comments by "Tori Smith" and then select the comment field.
Taking it step by step, to get all the comments, you need this:
$.comments.data
Then to filter to only the comments by "Tori Smith" add a filter like this:
$.comments.data[?(#.from.name == 'Tori Smith')]
Finally, to select only the message expand the query like this:
$.comments.data[?(#.from.name == 'Tori Smith')].message
I have tested this using the online JSON query tool here:
http://www.jsonquerytool.com/sample/jsonpathwhereselectcomments

Related

Need to get the a value from a specific nested json element using MySQL

Answers to a questionnaire are stored in json in a column in this table. I need to pull out the answer to a specific question stored within this column, but am struggling to do so.
I want to get the postValue '123456789' as 'Incorporation Number'
I am not sure how to get the specific child element value - any help much appreciated.
select json_extract(table.json_column, '$.user_data.element.postValue') as 'json'
from table
where table.id = 123
the json looks something like the below:
{
"risk_data":[
],
"user_data":[
"elements":[
{
"postName":"postNameInput",
"postValue":"TEST COMPANY"
}
],
"language":"english",
"elementId":"100",
"sectionId":"9",
"weightage":null,
"isHyperLink":0,
"elementIndex":0,
"elementTitle":{
"english":"Company's Full Legal Name"
},
"hyperLinkUrl":null,
"isWorkFlowNa":0,
"sectionIndex":0,
"sectionTitle":{
"english":"Basic Company Information"
},
"hyperLinkText":null,
"isMultiSelect":0,
"selectedChart":null,
"dataAttributes":null,
"elementTitleAbb":{
"english":"Company's Full Legal Name"
},
"isHiddenElement":0,
"isHiddenSection":0,
"elementInputType":"input",
"elementFooterText":null,
"elementHeaderText":null,
"elementDescription":null,
"hyperLinkTextAfter":null,
"question_abb_table":null,
"hyperLinkTextBefore":null,
"enableSelfReportingForManageThirdParty":0,
"enableSelfReportingForThirdPartyProfile":0
}, {
"elements":[
{
"postName":"postNameInput",
"postValue":"123456789"
}
],
"language":"english",
"elementId":"101",
"sectionId":"10",
"weightage":null,
"isHyperLink":0,
"elementIndex":4,
"elementTitle":{
"english":"Incorporation Number"
},
"hyperLinkUrl":null,
"isWorkFlowNa":0,
"sectionIndex":3,
"sectionTitle":{
"english":"Organisational Structure"
},
"hyperLinkText":null,
"isMultiSelect":0,
"selectedChart":null,
"dataAttributes":null,
"elementTitleAbb":{
"english":"Incorporation Number"
},
"isHiddenElement":0,
"isHiddenSection":0,
"elementInputType":"input",
"elementFooterText":null,
"elementHeaderText":null,
"elementDescription":null,
"hyperLinkTextAfter":null,
"question_abb_table":null,
"hyperLinkTextBefore":null,
"enableSelfReportingForManageThirdParty":0,
"enableSelfReportingForThirdPartyProfile":0
},

Mongo query to get comma separated value

I have query which is traversing only in forward direction.
example:
{
"orderStatus": "SUBMITTED",
"orderNumber": "785654",
"orderLine": [
{
"lineNumber": "E1000",
**"trackingnumber": "12345,67890",**
"lineStatus": "IN-PROGRESS",
"lineStatusCode": 50
}
],
"accountNumber": 9076
}
find({'orderLine.trackingNumber' : { $regex: "^12345.*"} })**
When I use the above query I get the entire document. But I want to fetch the document when I search with 67890 value as well
At any part of time I will be always querying with single tracking number only.
12345 or 67890 Either with 12345 or 67890. There are chances tracking number value can extend it's value 12345,56789,01234,56678.
I need to pull the whole document no matter what the tracking number is in whatever position.
OUTPUT
should be whole document
{
"orderStatus": "SUBMITTED",
"orderNumber": "785654",
"orderLine": [
{
"lineNumber": "E1000",
"trackingnumber": "12345,67890",
"lineStatus": "IN-PROGRESS",
"lineStatusCode": 50
}
],
"accountNumber": 9076
}
Also I have done indexing for trackingNumber field. Need help here. Thanks in advance.
Following will search with either 12345 or 67890. It is similar to like condition
find({'orderLine.trackingNumber' : { $regex: /12345/} })
find({'orderLine.trackingNumber' : { $regex: /67890/} })
There's also an alternative way to do this
Create a text index
db.order.createIndex({'orderLine.trackingnumber':"text"})
You can make use of this index to search the value from trackingnumber field
db.order.find({$text:{$search:'12345'}})
--
db.order.find({$text:{$search:'67890'}})
--
//Do take note that you can't search using few in between characters
//like the following query won't give any result..
db.order.find({$text:{$search:'6789'}}) //have purposefully removed 0
To further understand how $text searches work, please go through the following link.

Elastic search, watcher access dotted field names in the result set

I created a query for a elastic search watcher setup. The result set looks like this:
"_index": "transaction_broker-2017.09.15",
"_type": "transaction_broker",
"_id": "AV6Fn_UQ9KbnKce40avY",
"_score": 3.8539968,
"_source": {
"tbroker.workitem.sync_check.tbroker_value": 7000,
"source": "/logs/web/tomcat/tbroker.log.json",
"type": "transaction_broker",
"tbroker.job.instance_id": "lixporta-p00.xxxxxxx.15053054001381505305457198",
"tbroker.workitem.sync_check.backend_total_value": 6995,
"tbroker.appversion": "1.1.135-180",
"#version": 1,
"beat": {
"hostname": "lixporta-p00",
"name": "lixporta-p00",
"version": "5.1.1"
In the action section, I can access the fields by using:
"actions": {
"my-logging-action": {
"logging": {
"text": "There are {{ctx.payload.hits.hits.0._source.....
After the source tag, I use for example the "type" field from the list above. Other example is:
"ctx.payload.hits.hits.0._source.beat.hostname"
This works pretty fine...
But it is not possible to use a field like
"tbroker.workitem.sync_check.tbroker_value"
The parser thinks that this fields are nested, but this is only a fieldname with dots in it.
Is there any possiblity to "escape" this fieldname?
Anyone who also have had this problem ?
Many thanks & best regards
Claus
I think the following should work:
{{#ctx.payload.hits.hits.0._source}}{{tbroker.workitem.sync_check.tbroker_value}}{{/ctx.payload.hits.hits.0._source}}
It is a limitation of Mustache and this is a workaround.
Another example may help - when in a context looping through hits (I have added // comments purely for clarity - they aren't valid Mustache syntax & should be removed):
{{#ctx.payload.hits.hits}}
// This works fine
{{_source.foo}}
// Not working if each hit's _source contains "bar.baz", not nested "bar">"baz"
{{_source.bar.baz}}
{{/ctx.payload.hits.hits}}
Applying the same workaround by adding an extra context/section:
{{#ctx.payload.hits.hits}}
// Put us in the context of [the current hit] > _source
{{#_source}}
// Now both of these work...
{{foo}}
// ...including this one containing a dot (yay!)
{{bar.baz}}
{{/_source}}
{{/ctx.payload.hits.hits}}
There is no way to directly access source fields that have dots in them, but if you apply a transform like this:
"transform": {
"script": {
"inline": "return [ 'host' : ctx.payload.hits.hits[0]._source.host, 'tbroker_value' : ctx.payload.hits.hits[0]._source['tbroker.workitem.sync_check.tbroker_value']]",
"lang": "painless"
}
}
and then you can use {{ctx.payload.host}} and {{ctx.payload.tbroker_value}} in your action.

What is the regex to find part of JSON

I have this JSON part:
{
"destination_addresses":[
"8000 AA Zwolle, Nederland"
],
"origin_addresses":[
"8100 AA Heino, Nederland"
],
"rows":[
{
"elements":[
{
"distance":{
"text":"14,6 km",
"value":14555
},
"duration":{
"text":"17 min.",
"value":1022
},
"status":"OK"
}
]
}
],
"status":"OK"
}
I want the regex to find the value 14555 and the value 1022 but not by searching for this numbers (because the numbers always change) but by searching by the node (value in distance and value in duration)
Any ideas?
"distance" : {.*"value" : ([0-9]+) (,.*)*}
Do you mean something like this? If you really need a RegExp to do this.
I don't know the language you use. But in most languages, you can just parse the JSON to an object. Then you can get that easily.
For example, in JavaScript, var obj = JSON.parse(str); may fit your demand.

How does Simulating Joins works in Couchbase?

I have documents one is dependent to other. first:
{
"doctype": "closed_auctions",
"seller": {
"person": "person11304"
},
"buyer": {
"person": "person0"
},
"itemref": {
"item": "item1"
},
"price": 50.03,
"date": "11/17/2001",
"quantity": 1,
"type": "Featured",
"annotation": {
"author": {
"person": "person8597"
}
}
here you can see doc.buyer.person is dependent to another documents like this:
{
"doctype": "people",
"id": "person0",
"name": "Kasidit Treweek",
"profile": {
"income": 20186.59,
"interest": [
{
"category": "category251"
}
],
"education": "Graduate School",
"business": "No"
},
"watch": [
{
"open_auction": "open_auction8747"
}
]
}
How can I get buyer's name from these two documents? I means doc.buyer.person is connected with second document's id. It is join and from documentation it's not clear. http://docs.couchbase.com/couchbase-manual-2.0/#solutions-for-simulating-joins
Well, first off, let me point out that the very first sentence of the documentation section that you referenced says (I added the emphasis):
Joins between data, even when the documents being examined are
contained within the same bucket, are not possible directly within the
view system.
So, the quick answer to your question is that you have lots of options. Here are a few of them:
Assume you need only the name for a rather small subset of people. Create a view that outputs the PersonId as key and Name as value, then query the view for a specific name each time you need it.
Assume you need many people joined to many auctions. Download the full contents of the basic index from #1 and execute the join using linq.
Assume you need many properties of the person, not just the name. Download the Person document for each auction item.
Assume you need a small subset from both Auction and People. Index the fields from each that you need, include a type field, and emit all of them under the key of the Person. You will be able to query the view for all items belonging to the person.
The last approach was used in the example you linked to in your question. For performance, it will be necessary to tailor the approach to your usage scenario.
An other solution consist to merge datas in a custom reduce function.
// view
function (doc, meta) {
if (doc.doctype === "people") {
emit(doc.id, doc);
}
if (doc.doctype === "closed_auctions") {
emit(doc.buyer.person, doc);
}
}
// custom reduce
function (keys, values, rereduce) {
var peoples = values.filter(function (doc) {
return doc.doctype === "people";
});
for (var key in peoples) {
var people = peoples[key];
people.closed_auctions = (function (peopleId) {
return values.filter(function (doc) {
return doc.doctype === "closed_auctions" && doc.buyer.person === peopleId;
});
})(people.id);
}
return peoples;
}
And then you can query one user with "key" or multiple users with "keys".
After I don't know what the performances issues are with this method.