Related
I have an extract of tweets in JSON format. I have attached an sample of the data. I would need to convert this JSON into a dataframe.
So far I managed to convert it using the "jsonlite" package:
json_data <- jsonlite::stream_in(file("myjsonfile.txt"))
But it does not load all the information contained in the tweets. For example I only see the user who retweeted but not who posted the tweet.
You can view the json file better using this website by copy pasting the file and selecting format: http://jsonviewer.stack.hu/
The data is coming from the Twitter API (more information on this data available here: https://dev.twitter.com/overview/api/tweets
Thank you in advance for your time and help.
ML_Enthousiast
{"favorited": false, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "truncated": false, "in_reply_to_user_id_str": null, "coordinates": null, "retweeted": false, "text": "RT #Antoniotalks: Revenue streams for #OpenData companies!\n#Cloud #StartUp #SMM #AI #IoT #Fintech #BigData #deeplearning #Mpgvip\u2026 ", "retweet_count": 0, "filter_level": "low", "created_at": "Thu Jun 29 18:47:18 +0000 2017", "favorite_count": 0, "retweeted_status": {"favorited": false, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "display_text_range": [0, 140], "truncated": true, "in_reply_to_user_id_str": null, "coordinates": null, "retweeted": false, "text": "Revenue streams for #OpenData companies!\n#Cloud #StartUp #SMM #AI #IoT #Fintech #BigData #deeplearning #Mpgvip\u2026 ", "retweet_count": 38, "filter_level": "low", "created_at": "Wed Jun 28 12:45:08 +0000 2017", "favorite_count": 48, "in_reply_to_screen_name": null, "extended_tweet": {"extended_entities": {"media": [{"media_url_https": "", "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "large": {"w": 1200, "h": 927, "resize": "fit"}, "medium": {"w": 1200, "h": 927, "resize": "fit"}, "small": {"w": 680, "h": 525, "resize": "fit"}}, "type": "photo", "expanded_url": "", "id": 880044388679901184, "media_url": "http://pbs.twimg.com/media/DDaLtXXXYAAI2eM.jpg", "id_str": "880044388679901184", "display_url": "pic.twitter.com/aw9HeukUYv", "indices": [139, 162], "url": ""}]}, "full_text": "Revenue streams for #OpenData companies!\n#Cloud #StartUp #SMM #AI #IoT #Fintech #BigData #deeplearning #Mpgvip #defstar5 #DataScience #CIO ", "entities": {"user_mentions": [], "hashtags": [{"text": "OpenData", "indices": [20, 29]}, {"text": "Cloud", "indices": [41, 47]}, {"text": "StartUp", "indices": [48, 56]}, {"text": "SMM", "indices": [57, 61]}, {"text": "AI", "indices": [62, 65]}, {"text": "IoT", "indices": [66, 70]}, {"text": "Fintech", "indices": [71, 79]}, {"text": "BigData", "indices": [80, 88]}, {"text": "deeplearning", "indices": [89, 102]}, {"text": "Mpgvip", "indices": [103, 110]}, {"text": "defstar5", "indices": [111, 120]}, {"text": "DataScience", "indices": [121, 133]}, {"text": "CIO", "indices": [134, 138]}], "media": [{"media_url_https": "", "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "large": {"w": 1200, "h": 927, "resize": "fit"}, "medium": {"w": 1200, "h": 927, "resize": "fit"}, "small": {"w": 680, "h": 525, "resize": "fit"}}, "type": "photo", "expanded_url": "", "id": 880044388679901184, "media_url": "", "id_str": "880044388679901184", "display_url": "pic.twitter.com/aw9HeukUYv", "indices": [139, 162], "url": ""}], "symbols": [], "urls": []}, "display_text_range": [0, 138]}, "in_reply_to_status_id": null, "source": "Buffer", "id_str": "880044392110796800", "entities": {"user_mentions": [], "hashtags": [{"text": "OpenData", "indices": [20, 29]}, {"text": "Cloud", "indices": [41, 47]}, {"text": "StartUp", "indices": [48, 56]}, {"text": "SMM", "indices": [57, 61]}, {"text": "AI", "indices": [62, 65]}, {"text": "IoT", "indices": [66, 70]}, {"text": "Fintech", "indices": [71, 79]}, {"text": "BigData", "indices": [80, 88]}, {"text": "deeplearning", "indices": [89, 102]}, {"text": "Mpgvip", "indices": [103, 110]}], "symbols": [], "urls": [{"display_url": "twitter.com/i/web/status/8\u2026", "indices": [112, 135], "expanded_url": "", "url": "8H"}]}, "lang": "en", "id": 880044392110796800, "is_quote_status": false, "geo": null, "user": {"screen_name": "Antoniotalks", "profile_background_image_url": "", "profile_image_url": "jpg", "follow_request_sent": null, "profile_background_tile": false, "id": 2445890839, "is_translator": false, "description": "A father & CEO of Recruitd (#imrecruitd). Helping companies magnify their #employer and #recruitment #brand and #jobseekers with the #skillstosucceed.", "listed_count": 198, "favourites_count": 398, "created_at": "Tue Apr 15 19:13:52 +0000 2014", "notifications": null, "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "contributors_enabled": false, "profile_background_color": "C0DEED", "following": null, "friends_count": 6792, "protected": false, "default_profile": true, "profile_use_background_image": true, "name": "Antonio Giugno", "location": "London, England", "geo_enabled": true, "id_str": "2445890839", "utc_offset": -25200, "profile_banner_url": "0", "profile_text_color": "333333", "lang": "en-gb", "statuses_count": 4058, "profile_sidebar_fill_color": "DDEEF6", "default_profile_image": false, "profile_image_url_https": "4433/dVeGYfTX_normal.jpg", "profile_link_color": "1DA1F2", "url": "rnontubein", "verified": false, "profile_sidebar_border_color": "C0DEED", "followers_count": 6323, "time_zone": "Pacific Time (US & Canada)"}, "contributors": null, "possibly_sensitive": false, "place": null}, "in_reply_to_screen_name": null, "timestamp_ms": "1498762038396", "in_reply_to_status_id": null, "source": "Mobile Web (M2)", "id_str": "880497923150286848", "entities": {"user_mentions": [{"screen_name": "Antoniotalks", "id": 2445890839, "id_str": "2445890839", "name": "Antonio Giugno", "indices": [3, 16]}], "hashtags": [{"text": "OpenData", "indices": [38, 47]}, {"text": "Cloud", "indices": [59, 65]}, {"text": "StartUp", "indices": [66, 74]}, {"text": "SMM", "indices": [75, 79]}, {"text": "AI", "indices": [80, 83]}, {"text": "IoT", "indices": [84, 88]}, {"text": "Fintech", "indices": [89, 97]}, {"text": "BigData", "indices": [98, 106]}, {"text": "deeplearning", "indices": [107, 120]}, {"text": "Mpgvip", "indices": [121, 128]}], "symbols": [], "urls": [{"indices": [130, 130], "expanded_url": null, "url": ""}]}, "lang": "en", "id": 880497923150286848, "is_quote_status": false, "geo": null, "user": {"screen_name": "henrymbuguak", "profile_background_image_url": "://abs.twimg.com/images/themes/theme3/bg.gif", "profile_image_url": "://pbs.twimg.com/profile_images/822772556818239489/0yTbHCGj_normal.jpg", "follow_request_sent": null, "profile_background_tile": false, "id": 310697279, "is_translator": false, "description": "I enjoy coding. Visit my github project: :// ://github.com/henrymbuguak", "listed_count": 62, "favourites_count": 978, "created_at": "Sat Jun 04 05:55:09 +0000 2011", "notifications": null, "profile_background_image_url_https": "://abs.twimg.com/images/themes/theme3/bg.gif", "contributors_enabled": false, "profile_background_color": "EDECE9", "following": null, "friends_count": 2540, "protected": false, "default_profile": false, "profile_use_background_image": true, "name": "kiarie henry mbugua", "location": "Njoro, Kenya.", "geo_enabled": false, "id_str": "310697279", "utc_offset": 10800, "profile_banner_url": "://pbs.twimg.com/profile_banners/310697279/1484999353", "profile_text_color": "634047", "lang": "en", "statuses_count": 3775, "profile_sidebar_fill_color": "E3E2DE", "default_profile_image": false, "profile_image_url_https": "//pbs.twimg.com/profile_images/822772556818239489/0yTbHCGj_normal.jpg", "profile_link_color": "088253", "url": null, "verified": false, "profile_sidebar_border_color": "D3D2CF", "followers_count": 2141, "time_zone": "Nairobi"}, "contributors": null, "place": null}
If I read in your data using
indata <- jsonlite::read_json("myjsonfile.json")
then I get all the information contained in the JSON file. It is a nested list so you may need to extract the information you want from one of the elements in the list
> names(indata)
[1] "favorited" "in_reply_to_status_id_str"
[3] "in_reply_to_user_id" "truncated"
[5] "in_reply_to_user_id_str" "coordinates"
[7] "retweeted" "text"
[9] "retweet_count" "filter_level"
[11] "created_at" "favorite_count"
[13] "retweeted_status" "in_reply_to_screen_name"
[15] "timestamp_ms" "in_reply_to_status_id"
[17] "source" "id_str"
[19] "entities" "lang"
[21] "id" "is_quote_status"
[23] "geo" "user"
[25] "contributors" "place"
The information about the user is for example (only a part shown)
> indata$user
$screen_name
[1] "henrymbuguak"
$profile_background_image_url
[1] "://abs.twimg.com/images/themes/theme3/bg.gif"
$profile_image_url
[1] "://pbs.twimg.com/profile_images/822772556818239489/0yTbHCGj_normal.jpg"
$follow_request_sent
NULL
$profile_background_tile
[1] FALSE
$id
[1] 310697279
so you can get the user with indata$user$screen_name
I have a nested JSON. I want it to be read in pandas in order to explore it, but I got errors. When to use read_json method, I got: "Trailing data". It is valid JSON. How to read it in pd? (Tried differently, but did not work). It looks like this:
{
"contributors": null,
"coordinates": null,
"created_at": "Fri May 26 08:54:00 +0000 2017",
"entities": {
"hashtags": [],
"media": [
{
"display_url": "pic.twitter.com/Pm28ORTePl",
"expanded_url": "",
"id": 868027417121751040,
"id_str": "868027417121751040",
"indices": [
94,
117
],
"media_url": "",
"sizes": {
"large": {
"h": 404,
"resize": "fit",
"w": 773
},
"medium": {
"h": 404,
"resize": "fit",
"w": 773
},
"small": {
"h": 355,
"resize": "fit",
"w": 680
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": ""
}
],
"symbols": [],
"urls": [
{
"display_url": "",
"expanded_url": "",
"indices": [
70,
93
],
"url": ""
}
],
"user_mentions": []
},
"extended_entities": {
"media": [
{
"display_url": "pic.twitter.com/Pm28ORTePl",
"expanded_url": "1",
"id": 868027417121751040,
"id_str": "868027417121751040",
"indices": [
94,
117
],
"media_url": "",
"media_url_https": "",
"sizes": {
"large": {
"h": 404,
"resize": "fit",
"w": 773
},
"medium": {
"h": 404,
"resize": "fit",
"w": 773
},
"small": {
"h": 355,
"resize": "fit",
"w": 680
},
"thumb": {
"h": 150,
"resize": "crop",
"w": 150
}
},
"type": "photo",
"url": ""
}
]
},
"favorite_count": 1,
"favorited": false,
"geo": null,
"id": 868027425757724672,
"id_str": "868027425757724672",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"is_quote_status": false,
"lang": "ru",
"place": null,
"possibly_sensitive": false,
"retweet_count": 0,
"retweeted": false,
"source": "Twitter Web Client",
"text": "\u041f\u0440\u043e\u043f\u0430\u0432\u0448\u0430\u044f \u0432 \u041a\u043e\u043a\u0448\u0435\u0442\u0430\u0443 \u0448\u043a\u043e\u043b\u044c\u043d\u0438\u0446\u0430 \u0436\u0438\u043b\u0430 \u0432 \u0437\u0430\u0431\u0440\u043e\u0448\u0435\u043d\u043d\u043e\u043c \u0434\u043e\u043c\u0435 \u0438 \u0431\u0440\u043e\u0434\u044f\u0436\u043d\u0438\u0447\u0430\u043b\u0430\n",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Wed May 18 11:59:50 +0000 2011",
"default_profile": true,
"default_profile_image": false,
"description": "\u041a\u0430\u0437\u0430\u0445\u0441\u0442\u0430\u043d\u0441\u043a\u0438\u0439 \u0438\u043d\u0442\u0435\u0440\u043d\u0435\u0442-\u043f\u043e\u0440\u0442\u0430\u043b",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "",
"expanded_url": "",
"indices": [
0,
22
],
"url": ""
}
]
}
},
"favourites_count": 87,
"follow_request_sent": false,
"followers_count": 17989,
"following": true,
"friends_count": 98,
"geo_enabled": true,
"has_extended_profile": false,
"id": 300811189,
"id_str": "300811189",
"is_translation_enabled": false,
"is_translator": false,
"lang": "ru",
"listed_count": 86,
"location": "\u0410\u043b\u043c\u0430\u0442\u044b",
"name": "",
"notifications": false,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/300811189/1489117916",
"profile_image_url": "http://pbs.twimg.com/profile_images/840047424882298881/NxZSyfhM_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/840047424882298881/NxZSyfhM_normal.jpg",
"profile_link_color": "1DA1F2",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "",
"statuses_count": 53011,
"time_zone": "Quito",
"translator_type": "none",
"url": "",
"utc_offset": -18000,
"verified": false
}
}
Sorry, but your JSON is actually not valid, despite your saying it is.
This line:
"media_url": "": "",
Should probably be:
"media_url": "",
At which point, when I added the final bracket } that was outside of your code block, validated as properly formed JSON.
I have a number of large json files that I need to be able to read quickly in the terminal. I am interested accessing attributes in the last element of each json, which, for example, might look like this:
},
"source": "Twitter for Android",
"text": "RT #kosaqsi_tweets: Ah ah..!!!",
"truncated": false,
"user": {
"contributors_enabled": false,
"created_at": "Tue May 06 04:48:07 +0000 2014",
"default_profile": true,
"default_profile_image": false,
"description": "",
"entities": {
"description": {
"urls": []
}
},
"favourites_count": 2147,
"follow_request_sent": false,
"followers_count": 72,
"following": false,
"friends_count": 207,
"geo_enabled": true,
"has_extended_profile": false,
"id": 2479274491,
"id_str": "2479274491",
"is_translation_enabled": false,
"is_translator": false,
"lang": "en",
"listed_count": 1,
"location": "Singapore",
"name": "karthikeyan vedalam",
"notifications": false,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/2479274491/1453298552",
"profile_image_url": "http://pbs.twimg.com/profile_images/748055063625605120/rByPUFsn_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/748055063625605120/rByPUFsn_normal.jpg",
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "k84362172",
"statuses_count": 1521,
"time_zone": null,
"url": null,
"utc_offset": null,
"verified": false
}
}
]
What would be the proper command in terminal to access the last element of the json file and to see, for example, the value of attribute "geo_enabled" ?
The jq command may be your friend.
At a pinch:
jq .user.geo_enabled
It'll be a slightly longer path, given your JSON structure. Probably something like:
jq .[0].parent.user.geo_enabled
(Where 'parent' is whatever is 'above' your user key).
I am trying to get direct message from twitter using twitter api. I got the api json array respose like
[{
"id": 694476444991229955,
"id_str": "694476444991229955",
"text": "Got it",
"sender": {
"id": 1690262984,
"id_str": "1690262984",
"name": "Ashok Kumar T",
"screen_name": "Ashok_kumar_T",
"location": "Trivandrum",
"description": "",
"url": null
},
"protected": false,
"followers_count": 68,
"friends_count": 32,
"listed_count": 0,
"created_at": "Thu Aug 22 06:52:53 +0000 2013",
"favourites_count": 5,
"utc_offset": 19800,
"time_zone": "Chennai",
"geo_enabled": true,
"verified": false,
"statuses_count": 124,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"is_translation_enabled": false,
"profile_background_color": "131516",
"profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_tile": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/378800000337984382\/5eded5a0c6fda4a85511aff15e5befd9_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/378800000337984382\/5eded5a0c6fda4a85511aff15e5befd9_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1690262984\/1429709252",
"profile_link_color": "009999",
"profile_sidebar_border_color": "EEEEEE",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"has_extended_profile": false,
"default_profile": false,
"default_profile_image": false,
"following": true,
"follow_request_sent": false,
"notifications": false
}]
I have successfully parsed the json.But I cant get the particular data from parsed json. Here my play golang link http://play.golang.org/p/zS42Qws2Di
package main
import (
"encoding/json"
"fmt"
)
type PublicKey struct {
ID int64
ID_STR string
Text string
SENDER struct {
ID int64
ID_STR string
NAME string
}
PROTECTED bool
FOLLOWERS_COUNT int
FRIENDS_COUNT int
LISTED_COUNT int
}
type KeysResponse struct {
Collection []PublicKey
}
func main() {
s := `[{
"id": 694476444991229955,
"id_str": "694476444991229955",
"text": "Got it",
"sender": {
"id": 1690262984,
"id_str": "1690262984",
"name": "Ashok Kumar T",
"screen_name": "Ashok_kumar_T",
"location": "Trivandrum",
"description": "",
"url": null
},
"protected": false,
"followers_count": 68,
"friends_count": 32,
"listed_count": 0,
"created_at": "Thu Aug 22 06:52:53 +0000 2013",
"favourites_count": 5,
"utc_offset": 19800,
"time_zone": "Chennai",
"geo_enabled": true,
"verified": false,
"statuses_count": 124,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"is_translation_enabled": false,
"profile_background_color": "131516",
"profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_tile": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/378800000337984382\/5eded5a0c6fda4a85511aff15e5befd9_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/378800000337984382\/5eded5a0c6fda4a85511aff15e5befd9_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1690262984\/1429709252",
"profile_link_color": "009999",
"profile_sidebar_border_color": "EEEEEE",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"has_extended_profile": false,
"default_profile": false,
"default_profile_image": false,
"following": true,
"follow_request_sent": false,
"notifications": false
},{
"id": 694476444991229955,
"id_str": "694476444991229955",
"text": "Got it",
"sender": {
"id": 1690262984,
"id_str": "1690262984",
"name": "Ashok Kumar T",
"screen_name": "Ashok_kumar_T",
"location": "Trivandrum",
"description": "",
"url": null
},
"protected": false,
"followers_count": 68,
"friends_count": 32,
"listed_count": 0,
"created_at": "Thu Aug 22 06:52:53 +0000 2013",
"favourites_count": 5,
"utc_offset": 19800,
"time_zone": "Chennai",
"geo_enabled": true,
"verified": false,
"statuses_count": 124,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"is_translation_enabled": false,
"profile_background_color": "131516",
"profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_tile": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/378800000337984382\/5eded5a0c6fda4a85511aff15e5befd9_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/378800000337984382\/5eded5a0c6fda4a85511aff15e5befd9_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1690262984\/1429709252",
"profile_link_color": "009999",
"profile_sidebar_border_color": "EEEEEE",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"has_extended_profile": false,
"default_profile": false,
"default_profile_image": false,
"following": true,
"follow_request_sent": false,
"notifications": false
}]`
keys := make([]PublicKey,0)
err := json.Unmarshal([]byte(s), &keys)
if err == nil {
fmt.Printf("%+v\n", keys)
} else {
fmt.Println(err)
fmt.Printf("%+v\n", keys)
}
myId := keys[l].Id
fmt.Printf(myId)
}
If the goal is to print the identifier of the second array element, then use this code:
myId := keys[1].ID // l changed 1, Id changed to ID
fmt.Println(myId)
playground example
{
"filter_level": "low",
"retweeted": false,
"in_reply_to_screen_name": "paperboatdrinks",
"possibly_sensitive": false,
"truncated": false,
"lang": "und",
"in_reply_to_status_id_str": "595491441440227328",
"id": 595491657585299500,
"in_reply_to_user_id_str": "1484637054",
"timestamp_ms": "1430811249525",
"in_reply_to_status_id": 595491441440227300,
"created_at": "Tue May 05 07:34:09 +0000 2015",
"favorite_count": 0,
"place": null,
"coordinates": null,
"text": "#paperboatdrinks ������������������������������������",
"contributors": null,
"geo": null,
"entities": {
"trends": [],
"symbols": [],
"urls": [],
"hashtags": [],
"user_mentions": [
{
"id": 1484637054,
"name": "Paper Boat",
"indices": [
0,
16
],
"screen_name": "paperboatdrinks",
"id_str": "1484637054"
}
]
},
"source": "Twitter for Android",
"favorited": false,
"in_reply_to_user_id": 1484637054,
"retweet_count": 0,
"id_str": "595491657585299456",
"user": {
"location": "",
"default_profile": true,
"statuses_count": 28,
"profile_background_tile": false,
"lang": "en",
"profile_link_color": "0084B4",
"profile_banner_url": "https://pbs.twimg.com/profile_banners/3099469722/1429881891",
"id": 3099469722,
"following": null,
"favourites_count": 2,
"protected": false,
"profile_text_color": "333333",
"verified": false,
"description": "3d visualizer",
"contributors_enabled": false,
"profile_sidebar_border_color": "C0DEED",
"name": "VICKY GUSAIN",
"profile_background_color": "C0DEED",
"created_at": "Fri Mar 20 11:43:14 +0000 2015",
"default_profile_image": false,
"followers_count": 3,
"profile_image_url_https": "https://pbs.twimg.com/profile_images/591593652138344448/_JhM1YhS_normal.jpg",
"geo_enabled": true,
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"follow_request_sent": null,
"url": null,
"utc_offset": null,
"time_zone": null,
"notifications": null,
"profile_use_background_image": true,
"friends_count": 9,
"profile_sidebar_fill_color": "DDEEF6",
"screen_name": "VickyHanumant",
"id_str": "3099469722",
"profile_image_url": "http://pbs.twimg.com/profile_images/591593652138344448/_JhM1YhS_normal.jpg",
"listed_count": 0,
"is_translator": false
}
}`
But I had created schema but not accept it properly.
create external table if not exists paper5 (
filter_level string,
retweeted boolean,
in_reply_to_screen_name string,
possibly_sensitive boolean,
truncated boolean,
lang string,
in_reply_to_status_id_str string,
id int,
in_reply_to_user_id_str string,
timestamp_ms string,
in_reply_to_status_id int,
created_at string,
favorite_count int,
place string,
coordinates string,
text string,
contributors string,
geo string,
entities struct,
symbols: array,
urls: array,
hashtags: array,
user_mentions: array>, screen_name: string, id_str: string>>,
sources string,
favorited boolean,
in_reply_to_user_id int,
retweet_count int,
id_str string,
user struct ) ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.JsonSerde' location '/paper5/';
It showing error at entities and user_mentions field Please let me know to create a table in hive.
This link could help you it explains how you can insert data (json file) into hbase using hive QL with json_object