Read Nested JSON Data in DStrem in pyspark - json
I have written following code to stream data from Tweepy API. And I am getting data inside stream object. But unable to get streamp["user"]["followers_count"] but don't know how to get it. I also tried jsonLines = lines.flatMap(lambda json_str:json.loads(json_str)) but no help.
from __future__ import print_function
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
import json
sc = SparkContext()
ssc = StreamingContext(sc, 10) #Slide Interval of 10 sec
socket_stream = ssc.socketTextStream("localhost",4444)
stream = socket_stream.window(30) #window length 30 sec
stream.pprint()
ssc.start()
ssc.awaitTermination()
stream.pprint() gives me following JSON.
{"created_at":"Sat May 08 09:27:43 +0000 2021","id":1390961604079067137,"id_str":"1390961604079067137","text":"The return of the king! This style has so many parameters that have to be correct and Stone nails em all! - Drinkin\u2026 https:\/\/t.co\/uCogsfW8NC","source":"\u003ca href=\"https:\/\/untappd.com\" rel=\"nofollow\"\u003eUntappd\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":905439001,"id_str":"905439001","name":"Nahoj Morts","screen_name":"CraftBeerJunkie","location":null,"url":null,"description":null,"translator_type":"none","protected":false,"verified":false,"followers_count":15,"friends_count":113,"listed_count":0,"favourites_count":1,"statuses_count":1868,"created_at":"Fri Oct 26 06:36:01 +0000 2012","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/2765713047\/b92d58c569ad4739e67ef4d4e9a35780_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/2765713047\/b92d58c569ad4739e67ef4d4e9a35780_normal.jpeg","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"withheld_in_countries":[]},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"The return of the king! This style has so many parameters that have to be correct and Stone nails em all! - Drinking a Stone Sublimely Self-Righteous Black IPA by #StoneBrewing # Uggleberget \u2014 https:\/\/t.co\/a3XixD5teo","display_text_range":[0,217],"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/a3XixD5teo","expanded_url":"https:\/\/untp.beer\/s\/c1025322433","display_url":"untp.beer\/s\/c1025322433","indices":[194,217]}],"user_mentions":[{"screen_name":"StoneBrewing","name":"Stone Brewing","id":16331259,"id_str":"16331259","indices":[163,176]}],"symbols":[]}},"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/uCogsfW8NC","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1390961604079067137","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[117,140]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1620466063886"}
{"created_at":"Sat May 08 09:27:43 +0000 2021","id":1390961604334919683,"id_str":"1390961604334919683","text":"[HQ] 210508 #KrisWu #Wuyifan # GTSSC 2021 (Day 1) Cr.Fanbaobao(3) #PorscheRacerKrisWu \nhttps:\/\/t.co\/cEcAtzKNsM\u2026 https:\/\/t.co\/3B9uUS3mdu","display_text_range":[0,140],"source":"\u003ca href=\"https:\/\/mobile.twitter.com\" rel=\"nofollow\"\u003eTwitter Web App\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":1390961304316387331,"in_reply_to_status_id_str":"1390961304316387331","in_reply_to_user_id":620222301,"in_reply_to_user_id_str":"620222301","in_reply_to_screen_name":"kissmemyfan_","user":{"id":620222301,"id_str":"620222301","name":"KISSMEMYFAN1106\u2661","screen_name":"kissmemyfan_","location":null,"url":"https:\/\/www.youtube.com\/channel\/UCtYrDyYwqZyQfYtCz79zPHQ\/featured","description":"for #KrisWu Kris Wu Yifan since 120627 \u2740notify in case of emergency,pls dm\u2740 fb: https:\/\/www.facebook.com\/groups\/1051879991503131\/","translator_type":"regular","protected":false,"verified":false,"followers_count":91307,"friends_count":24,"listed_count":433,"favourites_count":14194,"statuses_count":95476,"created_at":"Wed Jun 27 16:34:02 +0000 2012","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"FFCC4D","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":true,"profile_link_color":"E81C4F","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1252957871802707974\/RjNGNnly_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1252957871802707974\/RjNGNnly_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/620222301\/1587563426","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"withheld_in_countries":[]},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"[HQ] 210508 #KrisWu #Wuyifan # GTSSC 2021 (Day 1) Cr.Fanbaobao(3) #PorscheRacerKrisWu \nhttps:\/\/t.co\/cEcAtzKNsM\nhttps:\/\/t.co\/PNbPqE88Y3 https:\/\/t.co\/8WPhfpHiyO","display_text_range":[0,134],"entities":{"hashtags":[{"text":"KrisWu","indices":[12,19]},{"text":"Wuyifan","indices":[20,28]},{"text":"PorscheRacerKrisWu","indices":[66,85]}],"urls":[{"url":"https:\/\/t.co\/cEcAtzKNsM","expanded_url":"https:\/\/wx1.sinaimg.cn\/large\/00688bwvly1gqb3c1cdbuj31l82bpe81.jpg","display_url":"wx1.sinaimg.cn\/large\/00688bwv\u2026","indices":[87,110]},{"url":"https:\/\/t.co\/PNbPqE88Y3","expanded_url":"https:\/\/wx4.sinaimg.cn\/large\/00688bwvly1gqb3c1mc9hj31ay1wb7ji.jpg","display_url":"wx4.sinaimg.cn\/large\/00688bwv\u2026","indices":[111,134]}],"user_mentions":[],"symbols":[],"media":[{"id":1390961423400857602,"id_str":"1390961423400857602","indices":[135,158],"media_url":"http:\/\/pbs.twimg.com\/media\/E02wC4IUcAI4F-O.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E02wC4IUcAI4F-O.jpg","url":"https:\/\/t.co\/8WPhfpHiyO","display_url":"pic.twitter.com\/8WPhfpHiyO","expanded_url":"https:\/\/twitter.com\/kissmemyfan_\/status\/1390961604334919683\/photo\/1","type":"photo","sizes":{"small":{"w":465,"h":680,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1498,"resize":"fit"},"medium":{"w":820,"h":1200,"resize":"fit"}}},{"id":1390961423417704450,"id_str":"1390961423417704450","indices":[135,158],"media_url":"http:\/\/pbs.twimg.com\/media\/E02wC4MVgAI8ZHZ.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E02wC4MVgAI8ZHZ.jpg","url":"https:\/\/t.co\/8WPhfpHiyO","display_url":"pic.twitter.com\/8WPhfpHiyO","expanded_url":"https:\/\/twitter.com\/kissmemyfan_\/status\/1390961604334919683\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":825,"h":1200,"resize":"fit"},"small":{"w":467,"h":680,"resize":"fit"},"large":{"w":1024,"h":1490,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1390961423400857602,"id_str":"1390961423400857602","indices":[135,158],"media_url":"http:\/\/pbs.twimg.com\/media\/E02wC4IUcAI4F-O.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E02wC4IUcAI4F-O.jpg","url":"https:\/\/t.co\/8WPhfpHiyO","display_url":"pic.twitter.com\/8WPhfpHiyO","expanded_url":"https:\/\/twitter.com\/kissmemyfan_\/status\/1390961604334919683\/photo\/1","type":"photo","sizes":{"small":{"w":465,"h":680,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1498,"resize":"fit"},"medium":{"w":820,"h":1200,"resize":"fit"}}},{"id":1390961423417704450,"id_str":"1390961423417704450","indices":[135,158],"media_url":"http:\/\/pbs.twimg.com\/media\/E02wC4MVgAI8ZHZ.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E02wC4MVgAI8ZHZ.jpg","url":"https:\/\/t.co\/8WPhfpHiyO","display_url":"pic.twitter.com\/8WPhfpHiyO","expanded_url":"https:\/\/twitter.com\/kissmemyfan_\/status\/1390961604334919683\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":825,"h":1200,"resize":"fit"},"small":{"w":467,"h":680,"resize":"fit"},"large":{"w":1024,"h":1490,"resize":"fit"}}}]}},"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"KrisWu","indices":[12,19]},{"text":"Wuyifan","indices":[20,28]},{"text":"PorscheRacerKrisWu","indices":[66,85]}],"urls":[{"url":"https:\/\/t.co\/cEcAtzKNsM","expanded_url":"https:\/\/wx1.sinaimg.cn\/large\/00688bwvly1gqb3c1cdbuj31l82bpe81.jpg","display_url":"wx1.sinaimg.cn\/large\/00688bwv\u2026","indices":[87,110]},{"url":"https:\/\/t.co\/3B9uUS3mdu","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1390961604334919683","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[112,135]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"ht","timestamp_ms":"1620466063947"}
{"created_at":"Sat May 08 09:27:44 +0000 2021","id":1390961607572865031,"id_str":"1390961607572865031","text":"RT #aa86marat2: #\u0644\u0627_\u0644\u0644\u062a\u0637\u0639\u064a\u0645_\u0627\u0644\u0627\u062c\u0628\u0627\u0631\u064a\n\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u062a\u0648\u0643\u0644\u062a \u0639\u0644\u0649 \u0627\u0644\u0644\u0647 \n\u0648\u0627\u0644\u0644\u0647 \u0643\u0631\u064a\u0645 \u064a\u0627\u0631\u0628 \u062a\u062a\u0642\u0641\u0644 \u0627\u0644\u064a\u0648\u0645 \u0628\u0648\u062c\u0648\u062f\u0627\u0644\u062e\u064a\u0631\u064a\u0646 \u0627\u0645\u062b\u0627\u0644\u0643\u0645\n \u064a\u0627\u0631\u0628 \u0633\u062e\u0631\u0644\u0647 \u0645\u0646 \u064a\u0642\u0641\u0644\u0647\u0627\n\u0633\u062c\u064a\u0646\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1250758860660527105,"id_str":"1250758860660527105","name":"om_mhamd99","screen_name":"Mhamd99Om","location":null,"url":null,"description":"\u064a\u0627\u0631\u0628 \u2728\ud83e\udd32\ud83c\udffb\u0628\u0627\u0631\u0643 \u0644\u064a \u0641\u064a\u0645\u0627 \u0623\u0639\u0637\u062a\u0646\u064a \u0648\u0641\u0631\u062c \u0647\u0645\u064a \u0628\u0642\u0636\u0627\u0621 \u062f\u064a\u0646 \u0648\u0627\u0639\u062a\u064a \u0639\u0644\u064a \u0630\u0644\u0643 \u0641\u0627\u062a\u0648\u0631\u0647 \u062a\u0646\u0641\u064a\u0630 1935092757","translator_type":"none","protected":false,"verified":false,"followers_count":903,"friends_count":389,"listed_count":0,"favourites_count":2174,"statuses_count":33294,"created_at":"Thu Apr 16 12:12:15 +0000 2020","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1286520143431061504\/k2Jg3W0u_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1286520143431061504\/k2Jg3W0u_normal.jpg","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"withheld_in_countries":[]},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sat May 08 02:27:00 +0000 2021","id":1390855727116427270,"id_str":"1390855727116427270","text":"#\u0644\u0627_\u0644\u0644\u062a\u0637\u0639\u064a\u0645_\u0627\u0644\u0627\u062c\u0628\u0627\u0631\u064a\n\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u062a\u0648\u0643\u0644\u062a \u0639\u0644\u0649 \u0627\u0644\u0644\u0647 \n\u0648\u0627\u0644\u0644\u0647 \u0643\u0631\u064a\u0645 \u064a\u0627\u0631\u0628 \u062a\u062a\u0642\u0641\u0644 \u0627\u0644\u064a\u0648\u0645 \u0628\u0648\u062c\u0648\u062f\u0627\u0644\u062e\u064a\u0631\u064a\u0646 \u0627\u0645\u062b\u0627\u0644\u0643\u0645\n \u064a\u0627\u0631\u0628 \u0633\u062e\u0631\u0644\u0647 \u0645\u0646 \u064a\u0642\u0641\u0644\u2026 https:\/\/t.co\/mpCKrtjMW4","display_text_range":[0,140],"source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1338279194892115974,"id_str":"1338279194892115974","name":"\u0639\u0628\u062f\u0627\u0644\u0631\u062d\u0645\u0646 \u0645\u062d\u0645\u062f 2","screen_name":"aa86marat2","location":null,"url":null,"description":null,"translator_type":"none","protected":false,"verified":false,"followers_count":382,"friends_count":82,"listed_count":1,"favourites_count":43,"statuses_count":16494,"created_at":"Mon Dec 14 00:26:54 +0000 2020","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1356322484673519616\/7NSfbGye_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1356322484673519616\/7NSfbGye_normal.jpg","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"withheld_in_countries":[]},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"#\u0644\u0627_\u0644\u0644\u062a\u0637\u0639\u064a\u0645_\u0627\u0644\u0627\u062c\u0628\u0627\u0631\u064a\n\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u062a\u0648\u0643\u0644\u062a \u0639\u0644\u0649 \u0627\u0644\u0644\u0647 \n\u0648\u0627\u0644\u0644\u0647 \u0643\u0631\u064a\u0645 \u064a\u0627\u0631\u0628 \u062a\u062a\u0642\u0641\u0644 \u0627\u0644\u064a\u0648\u0645 \u0628\u0648\u062c\u0648\u062f\u0627\u0644\u062e\u064a\u0631\u064a\u0646 \u0627\u0645\u062b\u0627\u0644\u0643\u0645\n \u064a\u0627\u0631\u0628 \u0633\u062e\u0631\u0644\u0647 \u0645\u0646 \u064a\u0642\u0641\u0644\u0647\u0627\n\u0633\u062c\u064a\u0646 \u0639\u0645\u0631\u064756 \u0639\u0627\u0645\u0627 \u0645\u062a\u0632\u0648\u062c \u0644\u062f\u064a\u0647 \u0637\u0641\u0644 \u0645\u0633\u062c\u0648\u0646 \u0645\u0646\u0630\u0639\u0627\u0645\u064a\u0646 \u06484\u0623\u0634\u0647\u0631\n \u0645\u062a\u0628\u0642\u0649 \u0639\u0644\u064a\u0647 296200 \u0631\u064a\u0627\u0644\n\u0627\u0644\u0641\u0627\u062a\u0648\u0631\u0629\n1934638011\n\u0639\u0628\u0631 #\n\nhttps:\/\/t.co\/JjOHPoGLrJ\n\n\u0627\u0644\u062c\u0631\u0623\u0629_\u0641\u064a_\u0627\u0644\u0637\u0628\u0639\n\u0645\u0635\u0631\u0641_\u0627\u0644\u0631\u0627\u062c\u062d\u064a https:\/\/t.co\/mGuOo9qwZP","display_text_range":[0,276],"entities":{"hashtags":[{"text":"\u0644\u0627_\u0644\u0644\u062a\u0637\u0639\u064a\u0645_\u0627\u0644\u0627\u062c\u0628\u0627\u0631\u064a","indices":[0,20]}],"urls":[{"url":"https:\/\/t.co\/JjOHPoGLrJ","expanded_url":"https:\/\/Ehsan.sa\/referral\/29734F898E5CCB199DD92FD5CE8284C3B10081046BB13791AC72608BDD62B3D84020AC2B515E485C2E4B718A425C1A710438C7B814161E4197F6E7F4F73557AA","display_url":"Ehsan.sa\/referral\/29734\u2026","indices":[223,246]}],"user_mentions":[],"symbols":[],"media":[{"id":1390855708363698183,"id_str":"1390855708363698183","indices":[277,300],"media_url":"http:\/\/pbs.twimg.com\/media\/E01P5c7XsAcgFlQ.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E01P5c7XsAcgFlQ.jpg","url":"https:\/\/t.co\/mGuOo9qwZP","display_url":"pic.twitter.com\/mGuOo9qwZP","expanded_url":"https:\/\/twitter.com\/aa86marat2\/status\/1390855727116427270\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":554,"h":1200,"resize":"fit"},"large":{"w":720,"h":1560,"resize":"fit"},"small":{"w":314,"h":680,"resize":"fit"}}},{"id":1390855718069317649,"id_str":"1390855718069317649","indices":[277,300],"media_url":"http:\/\/pbs.twimg.com\/media\/E01P6BFXsBEhWDZ.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E01P6BFXsBEhWDZ.jpg","url":"https:\/\/t.co\/mGuOo9qwZP","display_url":"pic.twitter.com\/mGuOo9qwZP","expanded_url":"https:\/\/twitter.com\/aa86marat2\/status\/1390855727116427270\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":511,"h":680,"resize":"fit"},"large":{"w":901,"h":1200,"resize":"fit"},"medium":{"w":901,"h":1200,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1390855708363698183,"id_str":"1390855708363698183","indices":[277,300],"media_url":"http:\/\/pbs.twimg.com\/media\/E01P5c7XsAcgFlQ.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E01P5c7XsAcgFlQ.jpg","url":"https:\/\/t.co\/mGuOo9qwZP","display_url":"pic.twitter.com\/mGuOo9qwZP","expanded_url":"https:\/\/twitter.com\/aa86marat2\/status\/1390855727116427270\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":554,"h":1200,"resize":"fit"},"large":{"w":720,"h":1560,"resize":"fit"},"small":{"w":314,"h":680,"resize":"fit"}}},{"id":1390855718069317649,"id_str":"1390855718069317649","indices":[277,300],"media_url":"http:\/\/pbs.twimg.com\/media\/E01P6BFXsBEhWDZ.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E01P6BFXsBEhWDZ.jpg","url":"https:\/\/t.co\/mGuOo9qwZP","display_url":"pic.twitter.com\/mGuOo9qwZP","expanded_url":"https:\/\/twitter.com\/aa86marat2\/status\/1390855727116427270\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":511,"h":680,"resize":"fit"},"large":{"w":901,"h":1200,"resize":"fit"},"medium":{"w":901,"h":1200,"resize":"fit"}}}]}},"quote_count":0,"reply_count":1,"retweet_count":33,"favorite_count":2,"entities":{"hashtags":[{"text":"\u0644\u0627_\u0644\u0644\u062a\u0637\u0639\u064a\u0645_\u0627\u0644\u0627\u062c\u0628\u0627\u0631\u064a","indices":[0,20]}],"urls":[{"url":"https:\/\/t.co\/mpCKrtjMW4","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1390855727116427270","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[117,140]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"ar"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"\u0644\u0627_\u0644\u0644\u062a\u0637\u0639\u064a\u0645_\u0627\u0644\u0627\u062c\u0628\u0627\u0631\u064a","indices":[16,36]}],"urls":[],"user_mentions":[{"screen_name":"aa86marat2","name":"\u0639\u0628\u062f\u0627\u0644\u0631\u062d\u0645\u0646 \u0645\u062d\u0645\u062f 2","id":1338279194892115974,"id_str":"1338279194892115974","indices":[3,14]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"ar","timestamp_ms":"1620466064719"}
{"created_at":"Sat May 08 09:27:44 +0000 2021","id":1390961607799296000,"id_str":"1390961607799296000","text":"RT #ariftgif: Tag ur bestie \/ fav person \/ # third person https:\/\/t.co\/L4pGn5nAKJ","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":812488479607169024,"id_str":"812488479607169024","name":"\ud83c\udfae","screen_name":"bjeyyy_","location":null,"url":null,"description":"always 20 hihihi","translator_type":"none","protected":false,"verified":false,"followers_count":100,"friends_count":108,"listed_count":0,"favourites_count":2519,"statuses_count":48348,"created_at":"Sat Dec 24 02:42:03 +0000 2016","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1381271588394336256\/CnmfqTCN_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1381271588394336256\/CnmfqTCN_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/812488479607169024\/1582382619","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"withheld_in_countries":[]},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Fri May 07 10:38:58 +0000 2021","id":1390617147198480388,"id_str":"1390617147198480388","text":"Tag ur bestie \/ fav person \/ # third person https:\/\/t.co\/L4pGn5nAKJ","display_text_range":[0,43],"source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1188902968277385216,"id_str":"1188902968277385216","name":"arif | \u2606","screen_name":"ariftgif","location":"\ud835\udcb6\ud835\udcc3\ud835\udcbf\ud835\udcb6\ud835\udcc8\ud835\udcc2\ud835\udcb6\ud835\udcc7\ud835\udcb6","url":"https:\/\/vt.tiktok.com\/ZSEAw2WN\/","description":"ugly asf | a man with mullet hair","translator_type":"none","protected":false,"verified":false,"followers_count":2144,"friends_count":937,"listed_count":0,"favourites_count":20934,"statuses_count":19470,"created_at":"Mon Oct 28 19:39:09 +0000 2019","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"F5F8FA","profile_background_image_url":"","profile_background_image_url_https":"","profile_background_tile":false,"profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1387751035041505282\/f_Q5rJ_B_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1387751035041505282\/f_Q5rJ_B_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/1188902968277385216\/1619601431","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"withheld_in_countries":[]},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":49,"reply_count":176,"retweet_count":1451,"favorite_count":3773,"entities":{"hashtags":[],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":1390617137362792450,"id_str":"1390617137362792450","indices":[44,67],"media_url":"http:\/\/pbs.twimg.com\/media\/E0x26ysUcAIYLJd.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E0x26ysUcAIYLJd.jpg","url":"https:\/\/t.co\/L4pGn5nAKJ","display_url":"pic.twitter.com\/L4pGn5nAKJ","expanded_url":"https:\/\/twitter.com\/ariftgif\/status\/1390617147198480388\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":667,"resize":"fit"},"medium":{"w":1080,"h":1060,"resize":"fit"},"large":{"w":1080,"h":1060,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1390617137362792450,"id_str":"1390617137362792450","indices":[44,67],"media_url":"http:\/\/pbs.twimg.com\/media\/E0x26ysUcAIYLJd.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E0x26ysUcAIYLJd.jpg","url":"https:\/\/t.co\/L4pGn5nAKJ","display_url":"pic.twitter.com\/L4pGn5nAKJ","expanded_url":"https:\/\/twitter.com\/ariftgif\/status\/1390617147198480388\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":667,"resize":"fit"},"medium":{"w":1080,"h":1060,"resize":"fit"},"large":{"w":1080,"h":1060,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[{"screen_name":"ariftgif","name":"arif | \u2606","id":1188902968277385216,"id_str":"1188902968277385216","indices":[3,12]}],"symbols":[],"media":[{"id":1390617137362792450,"id_str":"1390617137362792450","indices":[58,81],"media_url":"http:\/\/pbs.twimg.com\/media\/E0x26ysUcAIYLJd.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E0x26ysUcAIYLJd.jpg","url":"https:\/\/t.co\/L4pGn5nAKJ","display_url":"pic.twitter.com\/L4pGn5nAKJ","expanded_url":"https:\/\/twitter.com\/ariftgif\/status\/1390617147198480388\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":667,"resize":"fit"},"medium":{"w":1080,"h":1060,"resize":"fit"},"large":{"w":1080,"h":1060,"resize":"fit"}},"source_status_id":1390617147198480388,"source_status_id_str":"1390617147198480388","source_user_id":1188902968277385216,"source_user_id_str":"1188902968277385216"}]},"extended_entities":{"media":[{"id":1390617137362792450,"id_str":"1390617137362792450","indices":[58,81],"media_url":"http:\/\/pbs.twimg.com\/media\/E0x26ysUcAIYLJd.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/E0x26ysUcAIYLJd.jpg","url":"https:\/\/t.co\/L4pGn5nAKJ","display_url":"pic.twitter.com\/L4pGn5nAKJ","expanded_url":"https:\/\/twitter.com\/ariftgif\/status\/1390617147198480388\/photo\/1","type":"photo","sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":667,"resize":"fit"},"medium":{"w":1080,"h":1060,"resize":"fit"},"large":{"w":1080,"h":1060,"resize":"fit"}},"source_status_id":1390617147198480388,"source_status_id_str":"1390617147198480388","source_user_id":1188902968277385216,"source_user_id_str":"1188902968277385216"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1620466064773"}
I am new to spark and already spent 3 days finding out how to get value from Nested JSON Inside DStream Object.
You can map the json string to a tuple of the values that you want to extract:
import json
def parse_json_string(str):
user = json.loads(str)["user"]
return (user["name"], user["followers_count"])
stream = socket_stream.window...
stream = stream.map(lambda str: parse_json_string(str))
stream.pprint()
ssc.start()
ssc.awaitTermination()
Output:
-------------------------------------------
Time: 2021-05-08 18:50:48
-------------------------------------------
('Nahoj Morts', 15)
('KISSMEMYFAN1106♡', 91307)
('om_mhamd99', 903)
('🎮', 100)
Related
How to convert multiple json objects interpreted as string into Json dictionary
I have my data as below which looks like multiple json dictionaries but it is of type string. Can someone please help me out to convert it into json dictionary ? {"id": "1305857561179152385", "tweet": "If you like vintage coke machines and guys who look like Fred Flintstone you'll love the short we've riffed: Coke R\u2026 ", "ts": "Tue Sep 15 13:14:38 +0000 2020"}{"id": "1305858267067883521", "tweet": "Chinese unicorn Genki Forest plots own beverage hits #China #Chinese #Brands #GoingGlobal\u2026 ", "ts": "Tue Sep 15 13:17:27 +0000 2020"}{"id": "1305858731293507585", "tweet": "RT #CinemaCheezy: If you like vintage coke machines and guys who look like Fred Flintstone you'll love the short we've riffed: Coke Refresh\u2026", "ts": "Tue Sep 15 13:19:17 +0000 2020"}
Try this, let = "{'a': 'b', 'c': 'd'}{'e':'f', 'g':'h'}" let_list = let.split('}') d = [] for i in let_list[:-1]: val = eval(i + '}') d.append(val) The output will be two dictionaries print(d) # Will print as shown [{'a': 'b', 'c': 'd'}, {'e':'f', 'g':'h'}]
import json json.loads(json_str)
Change date format in my HTML / CSS output
Have a dates saved in my sqlite database in this format 2019-01-24 13:41:40.515955 and when I output to my web page its displayed as 2019-01-24 13:41:40 UTC. Please can I get guidance on displaying something like Wed 24 January 2019 ? No sure how to approach it
Use Javascript, would be a simple approach to parse and format dates with desired output. var date = new Date('6/29/2011 4:52:48 PM UTC'); date.toString() // "Wed Jun 29 2011 09:52:48 GMT-0700 (PDT)" var date = new Date('6/29/2011 4:52:48 PM UTC'); date.toString() // "Wed Jun 29 2011 09:52:48 GMT-0700 (PDT)" console.log(`Year: ${date.getFullYear()}, Month: ${date.getMonth()}, Day, ${date.getDay()}`) You can also parse dates using Date.parse(``);
Convert a row-List Cassandra table to a JSON format using scala
I want to convert a Cassandra table into JSON format using scala; this is the code I use to connect to Cassandra and show the table: val cluster = Cluster.builder().addContactPoint("localhost").build() val session = cluster.connect("MyKeySpace") try { val a = session.execute("Select* from users") println(a.all()) //Show as a Row-List //Sample --> [Row[10, Fri Jan 19 04:05:01 MST 2018, 9217], Row[10, Mon Feb 19 04:05:01 MST 2018, 9217], Row[10, Mon Mar 19 04:05:01 MDT 2018, 9217]] /** I have this example for the convertion but do not supports that format **/ case class Sample (Registro: Int,Fecha: String,Valor: String ) val agregado = Sample(999,"Wed May 20 15:19:21 MDT 31063531","982556517") val json= ("Reg_Num:"->agregado.Registro)~("TimeStamp:"->agregado.Fecha) ~ ("Value:"->agregado.Valor) //This is a List val JsonExam = println(compact(render(json))) println ( pretty(render(json)) ) } catch { case e: Exception => println(s"msg=${e.getMessage}") } Basically, I want to convert from this format: [Row[10, Fri Jan 19 04:05:01 MST 2018, 9217], Row[12, Mon Feb 20 04:05:01 MST 2018, 9216], Row[18, Tue Mar 21 04:05:01 MDT 2018, 9215]] To this: { "Reg_Num:" : 10, "TimeStamp:" : "Fri Jan 19 04:05:01 MST 2018", "Value:" : "9217" }, { "Reg_Num:" : 12, "TimeStamp:" : "Mon Feb 20 04:05:01 MST 2018", "Value:" : "9216" }, { "Reg_Num:" : 18, "TimeStamp:" : "Tue Mar 21 04:05:01 MDT 2018", "Value:" : "9215" }
It will depend on which Json library you are using. In Play Json, we create "Writes" methods which take an instance of a case class and convert it to Json. When these are implicit then the compiler will do it "automatically" when needed. For example: .... import play.api.libs.json._ case class Sample(Registro: Int, Fecha: String, Valor: String ){ object Sample { implicit val SamplenWrites = new Writes[Sample] { def writes(sample: Sample):JsValue = Json.obj( "reg_rum"-> sample.Registro, "timeStamp"-> sample.Fecha, "value" -> sample.Valor) } } } Json.obj("samples" -> Sample(5, "Fri Jan 19", "9200"))
Splitting strings by words in Scala Spark
What I'm trying to do here is to leave texts only from each tweet. import org.apache.spark.{SparkConf, SparkContext} import scala.io.Source object shortTwitter { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName("ShortTwitterAnalysis").setMaster("local[2]") val sc = new SparkContext(sparkConf) val text = sc.textFile("/home/tobby/data/shortTwitter.txt") val counts = text .map(_.toLowerCase) .map(_.toString) .map(_.replace("\t", "")) .map(_.replace("\"", "")) .map(_.replace("\n", "")) .map(_.replaceAll("[\\p{C}]", "")) .map(_.split("\"text\":\"")(1).split("\",\"source\":")(0)) counts.foreach(println) } } But the last map function .map(_.split("\"text\":\"")(1).split("\",\"source\":")(0)) does not work. Do you have any advice? Without the .map(_.split("\"text\":\"")(1).split("\",\"source\":")(0)) my tweets look like below : {created_at:wed jul 16 23:58:19 +0000 2014,id:489559687189110784,id_str:489559687189110784,text:a rose by any other name would smell as sweet,source:\u003ca href=\https:\/\/twitter.com\/download\/android\ rel=\nofollow\\u003etwitter for android\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:621244372,id_str:621244372,name:\u2665,screen_name:ivunia_ontrinae,location:,url:null,description:me myself & i \u2764,protected:false,verified:false,followers_count:1023,friends_count:591,listed_count:1,favourites_count:1909,statuses_count:26770,created_at:thu jun 28 19:23:06 +0000 2012,utc_offset:-10800,time_zone:atlantic time (canada),geo_enabled:true,lang:en,contributors_enabled:false,is_translator:false,profile_background_color:c0deed,profile_background_image_url:http:\/\/pbs.twimg.com\/profile_background_images\/378800000101658269\/ec0820565f0451a3ce7169c776fbe41f.jpeg,profile_background_image_url_https:https:\/\/pbs.twimg.com\/profile_background_images\/378800000101658269\/ec0820565f0451a3ce7169c776fbe41f.jpeg,profile_background_tile:true,profile_link_color:e62bb4,profile_sidebar_border_color:000000,profile_sidebar_fill_color:ddeef6,profile_text_color:333333,profile_use_background_image:true,profile_image_url:http:\/\/pbs.twimg.com\/profile_images\/483373612749959168\/f3qpy_66_normal.jpeg,profile_image_url_https:https:\/\/pbs.twimg.com\/profile_images\/483373612749959168\/f3qpy_66_normal.jpeg,profile_banner_url:https:\/\/pbs.twimg.com\/profile_banners\/621244372\/1404758956,default_profile:false,default_profile_image:false,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,retweet_count:0,favorite_count:0,entities:{hashtags:[],trends:[],urls:[],user_mentions:[],symbols:[]},favorited:false,retweeted:false,possibly_sensitive:false,filter_level:medium,lang:en} {created_at:wed jul 16 23:58:19 +0000 2014,id:489559687189110784,id_str:489559687189110784,text:a rose is a rose is a rose,source:\u003ca href=\https:\/\/twitter.com\/download\/android\ rel=\nofollow\\u003etwitter for android\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:621244372,id_str:621244372,name:\u2665,screen_name:ivunia_ontrinae,location:,url:null,description:me myself & i \u2764,protected:false,verified:false,followers_count:1023,friends_count:591,listed_count:1,favourites_count:1909,statuses_count:26770,created_at:thu jun 28 19:23:06 +0000 2012,utc_offset:-10800,time_zone:atlantic time (canada),geo_enabled:true,lang:en,contributors_enabled:false,is_translator:false,profile_background_color:c0deed,profile_background_image_url:http:\/\/pbs.twimg.com\/profile_background_images\/378800000101658269\/ec0820565f0451a3ce7169c776fbe41f.jpeg,profile_background_image_url_https:https:\/\/pbs.twimg.com\/profile_background_images\/378800000101658269\/ec0820565f0451a3ce7169c776fbe41f.jpeg,profile_background_tile:true,profile_link_color:e62bb4,profile_sidebar_border_color:000000,profile_sidebar_fill_color:ddeef6,profile_text_color:333333,profile_use_background_image:true,profile_image_url:http:\/\/pbs.twimg.com\/profile_images\/483373612749959168\/f3qpy_66_normal.jpeg,profile_image_url_https:https:\/\/pbs.twimg.com\/profile_images\/483373612749959168\/f3qpy_66_normal.jpeg,profile_banner_url:https:\/\/pbs.twimg.com\/profile_banners\/621244372\/1404758956,default_profile:false,default_profile_image:false,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,retweet_count:0,favorite_count:0,entities:{hashtags:[],trends:[],urls:[],user_mentions:[],symbols:[]},favorited:false,retweeted:false,possibly_sensitive:false,filter_level:medium,lang:en} {created_at:wed jul 16 23:58:19 +0000 2014,id:489559687176945664,id_str:489559687176945664,text:love is like a rose the joy of all the earth,source:\u003ca href=\http:\/\/twitter.com\/download\/iphone\ rel=\nofollow\\u003etwitter for iphone\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:363819213,id_str:363819213,name:ivanna010394,screen_name:ivannacarrillo,location:,url:null,description:null,protected:false,verified:false,followers_count:243,friends_count:530,listed_count:0,favourites_count:26,statuses_count:5672,created_at:sun aug 28 18:58:49 +0000 2011,utc_offset:-14400,time_zone:eastern time (us & canada),geo_enabled:false,lang:es,contributors_enabled:false,is_translator:false,profile_background_color:642d8b,profile_background_image_url:http:\/\/pbs.twimg.com\/profile_background_images\/767201253\/661eb2d4915e9ee6566647dcbaab0186.jpeg,profile_background_image_url_https:https:\/\/pbs.twimg.com\/profile_background_images\/767201253\/661eb2d4915e9ee6566647dcbaab0186.jpeg,profile_background_tile:true,profile_link_color:ff0000,profile_sidebar_border_color:ffffff,profile_sidebar_fill_color:7ac3ee,profile_text_color:3d1957,profile_use_background_image:true,profile_image_url:http:\/\/pbs.twimg.com\/profile_images\/455873054703648768\/_b4mf6o7_normal.jpeg,profile_image_url_https:https:\/\/pbs.twimg.com\/profile_images\/455873054703648768\/_b4mf6o7_normal.jpeg,profile_banner_url:https:\/\/pbs.twimg.com\/profile_banners\/363819213\/1402261141,default_profile:false,default_profile_image:false,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,retweeted_status:{created_at:wed jul 16 13:45:28 +0000 2014,id:489405458168709120,id_str:489405458168709120,text:our milan show is now sold out, thankyou :d tickets are still available for most of europe ! http:\/\/t.co\/arnh7pvoap http:\/\/t.co\/t5wzyocrtu,source:\u003ca href=\http:\/\/twitter.com\ rel=\nofollow\\u003etwitter web client\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:264107729,id_str:264107729,name:5 seconds of summer,screen_name:5sos,location:sydney, australia,url:http:\/\/www.facebook.com\/5secondsofsummer,description:4 aussies making music :) love the people who support us! our album is out :) http:\/\/po.st\/or93y4 | #ashton5sos #calum5sos #michael5sos #luke5sos,protected:false,verified:true,followers_count:3704204,friends_count:28660,listed_count:20024,favourites_count:1061,statuses_count:17297,created_at:fri mar 11 10:18:46 +0000 2011,utc_offset:36000,time_zone:sydney,geo_enabled:false,lang:en,contributors_enabled:false,is_translator:false,profile_background_color:000000,profile_background_image_url:http:\/\/pbs.twimg.com\/profile_background_images\/483531430371147778\/0gzkh2zi.jpeg,profile_background_image_url_https:https:\/\/pbs.twimg.com\/profile_background_images\/483531430371147778\/0gzkh2zi.jpeg,profile_background_tile:false,profile_link_color:c21b1b,profile_sidebar_border_color:ffffff,profile_sidebar_fill_color:ddeef6,profile_text_color:333333,profile_use_background_image:true,profile_image_url:http:\/\/pbs.twimg.com\/profile_images\/485730748574752768\/zm1ctcvv_normal.jpeg,profile_image_url_https:https:\/\/pbs.twimg.com\/profile_images\/485730748574752768\/zm1ctcvv_normal.jpeg,profile_banner_url:https:\/\/pbs.twimg.com\/profile_banners\/264107729\/1404117825,default_profile:false,default_profile_image:false,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,retweet_count:12648,favorite_count:31390,entities:{hashtags:[],trends:[],urls:[{url:http:\/\/t.co\/arnh7pvoap,expanded_url:http:\/\/5sos.com\/live,display_url:5sos.com\/live,indices:[93,115]}],user_mentions:[],symbols:[],media:[{id:489405457111715840,id_str:489405457111715840,indices:[116,138],media_url:http:\/\/pbs.twimg.com\/media\/bsq3q5zieaakbgg.jpg,media_url_https:https:\/\/pbs.twimg.com\/media\/bsq3q5zieaakbgg.jpg,url:http:\/\/t.co\/t5wzyocrtu,display_url:pic.twitter.com\/t5wzyocrtu,expanded_url:http:\/\/twitter.com\/5sos\/status\/489405458168709120\/photo\/1,type:photo,sizes:{small:{w:340,h:613,resize:fit},thumb:{w:150,h:150,resize:crop},medium:{w:600,h:1081,resize:fit},large:{w:811,h:1461,resize:fit}}}]},favorited:false,retweeted:false,possibly_sensitive:false,filter_level:low,lang:en},retweet_count:0,favorite_count:0,entities:{hashtags:[],trends:[],urls:[{url:http:\/\/t.co\/arnh7pvoap,expanded_url:http:\/\/5sos.com\/live,display_url:5sos.com\/live,indices:[103,125]}],user_mentions:[{screen_name:5sos,name:5 seconds of summer,id:264107729,id_str:264107729,indices:[3,8]}],symbols:[],media:[{id:489405457111715840,id_str:489405457111715840,indices:[126,140],media_url:http:\/\/pbs.twimg.com\/media\/bsq3q5zieaakbgg.jpg,media_url_https:https:\/\/pbs.twimg.com\/media\/bsq3q5zieaakbgg.jpg,url:http:\/\/t.co\/t5wzyocrtu,display_url:pic.twitter.com\/t5wzyocrtu,expanded_url:http:\/\/twitter.com\/5sos\/status\/489405458168709120\/photo\/1,type:photo,sizes:{small:{w:340,h:613,resize:fit},thumb:{w:150,h:150,resize:crop},medium:{w:600,h:1081,resize:fit},large:{w:811,h:1461,resize:fit}},source_status_id:489405458168709120,source_status_id_str:489405458168709120}]},favorited:false,retweeted:false,possibly_sensitive:false,filter_level:medium,lang:en} {created_at:sat jan 16 12:00:47 +0000 2016,id:688330052233199616,id_str:688330052233199616,text:rt #nba2k: the battle of two young teams. tough season but one will emerge victorious. who will it be? lakers or 76ers? https:\/\/t.co\/nukkjq\u2026,source:\u003ca href=\http:\/\/twitter.com\ rel=\nofollow\\u003etwitter web client\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:4817727209,id_str:4817727209,name:mark lieyg,screen_name:_yungwiggins_,location:null,url:null,description:null,protected:false,verified:false,followers_count:3,friends_count:40,listed_count:0,favourites_count:0,statuses_count:39,created_at:sat jan 16 11:06:38 +0000 2016,utc_offset:-28800,time_zone:pacific time (us & canada),geo_enabled:false,lang:en,contributors_enabled:false,is_translator:false,profile_background_color:f5f8fa,profile_background_image_url:,profile_background_image_url_https:,profile_background_tile:false,profile_link_color:2b7bb9,profile_sidebar_border_color:c0deed,profile_sidebar_fill_color:ddeef6,profile_text_color:333333,profile_use_background_image:true,profile_image_url:http:\/\/abs.twimg.com\/sticky\/default_profile_images\/default_profile_1_normal.png,profile_image_url_https:https:\/\/abs.twimg.com\/sticky\/default_profile_images\/default_profile_1_normal.png,default_profile:true,default_profile_image:true,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,retweeted_status: {created_at:sat jan 02 03:31:10 +0000 2016,id:683128371627200513,id_str:683128371627200513,text:the battle of two young teams. tough season but one will emerge victorious. who will it be? lakers or 76ers? https:\/\/t.co\/nukkjqqspa,source:\u003ca href=\http:\/\/percolate.com\ rel=\nofollow\\u003epercolate\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:15573174,id_str:15573174,name:nba 2k 2k16,screen_name:nba2k,location:novato, ca,url:http:\/\/www.2k.com,description:esrb rating: everyone 10+. #nba2k16 available now for playstation 4 & xbox one, playstation 3 & xbox 360 & pc http:\/\/2kgam.es\/buynba2k16,protected:false,verified:true,followers_count:948071,friends_count:1630,listed_count:3305,favourites_count:10,statuses_count:8162,created_at:wed jul 23 21:57:14 +0000 2008,utc_offset:-28800,time_zone:pacific time (us & canada),geo_enabled:true,lang:en,contributors_enabled:false,is_translator:false,profile_background_color:000000,profile_background_image_url:http:\/\/pbs.twimg.com\/profile_background_images\/539865904528371712\/gnb-ggrq.png,profile_background_image_url_https:https:\/\/pbs.twimg.com\/profile_background_images\/539865904528371712\/gnb-ggrq.png,profile_background_tile:false,profile_link_color:ff0300,profile_sidebar_border_color:ffffff,profile_sidebar_fill_color:0d2b44,profile_text_color:408af2,profile_use_background_image:true,profile_image_url:http:\/\/pbs.twimg.com\/profile_images\/606562975109890048\/sumjozun_normal.jpg,profile_image_url_https:https:\/\/pbs.twimg.com\/profile_images\/606562975109890048\/sumjozun_normal.jpg,profile_banner_url:https:\/\/pbs.twimg.com\/profile_banners\/15573174\/1433457451,default_profile:false,default_profile_image:false,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,is_quote_status:false,retweet_count:112,favorite_count:547,entities:{hashtags:[],urls:[],user_mentions:[],symbols:[],media:[{id:683128370796736512,id_str:683128370796736512,indices:[109,132],media_url:http:\/\/pbs.twimg.com\/media\/cxr1okvusaamnu4.jpg,media_url_https:https:\/\/pbs.twimg.com\/media\/cxr1okvusaamnu4.jpg,url:https:\/\/t.co\/nukkjqqspa,display_url:pic.twitter.com\/nukkjqqspa,expanded_url:http:\/\/twitter.com\/nba2k\/status\/683128371627200513\/photo\/1,type:photo,sizes:{large:{w:1024,h:419,resize:fit},thumb:{w:150,h:150,resize:crop},medium:{w:600,h:245,resize:fit},small:{w:340,h:139,resize:fit}}}]},extended_entities:{media:[{id:683128370796736512,id_str:683128370796736512,indices:[109,132],media_url:http:\/\/pbs.twimg.com\/media\/cxr1okvusaamnu4.jpg,media_url_https:https:\/\/pbs.twimg.com\/media\/cxr1okvusaamnu4.jpg,url:https:\/\/t.co\/nukkjqqspa,display_url:pic.twitter.com\/nukkjqqspa,expanded_url:http:\/\/twitter.com\/nba2k\/status\/683128371627200513\/photo\/1,type:photo,sizes:{large:{w:1024,h:419,resize:fit},thumb:{w:150,h:150,resize:crop},medium:{w:600,h:245,resize:fit},small:{w:340,h:139,resize:fit}}}]},favorited:false,retweeted:false,possibly_sensitive:false,filter_level:low,lang:en},is_quote_status:false,retweet_count:0,favorite_count:0,entities:{hashtags:[],urls:[],user_mentions:[{screen_name:nba2k,name:nba 2k 2k16,id:15573174,id_str:15573174,indices:[3,9]}],symbols:[],media:[{id:683128370796736512,id_str:683128370796736512,indices:[120,140],media_url:http:\/\/pbs.twimg.com\/media\/cxr1okvusaamnu4.jpg,media_url_https:https:\/\/pbs.twimg.com\/media\/cxr1okvusaamnu4.jpg,url:https:\/\/t.co\/nukkjqqspa,display_url:pic.twitter.com\/nukkjqqspa,expanded_url:http:\/\/twitter.com\/nba2k\/status\/683128371627200513\/photo\/1,type:photo,sizes:{large:{w:1024,h:419,resize:fit},thumb:{w:150,h:150,resize:crop},medium:{w:600,h:245,resize:fit},small:{w:340,h:139,resize:fit}},source_status_id:683128371627200513,source_status_id_str:683128371627200513,source_user_id:15573174,source_user_id_str:15573174}]},extended_entities:{media:[{id:683128370796736512,id_str:683128370796736512,indices:[120,140],media_url:http:\/\/pbs.twimg.com\/media\/cxr1okvusaamnu4.jpg,media_url_https:https:\/\/pbs.twimg.com\/media\/cxr1okvusaamnu4.jpg,url:https:\/\/t.co\/nukkjqqspa,display_url:pic.twitter.com\/nukkjqqspa,expanded_url:http:\/\/twitter.com\/nba2k\/status\/683128371627200513\/photo\/1,type:photo,sizes:{large:{w:1024,h:419,resize:fit},thumb:{w:150,h:150,resize:crop},medium:{w:600,h:245,resize:fit},small:{w:340,h:139,resize:fit}},source_status_id:683128371627200513,source_status_id_str:683128371627200513,source_user_id:15573174,source_user_id_str:15573174}]},favorited:false,retweeted:false,possibly_sensitive:false,filter_level:low,lang:en,timestamp_ms:1452945647663} {created_at:wed jul 16 23:58:19 +0000 2014,id:489559687176945664,id_str:489559687176945664,text:at christmas i no more desire a rose than wish a snow in may’s new-fangled mirth,source:\u003ca href=\http:\/\/twitter.com\/download\/iphone\ rel=\nofollow\\u003etwitter for iphone\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:363819213,id_str:363819213,name:ivanna010394,screen_name:ivannacarrillo,location:,url:null,description:null,protected:false,verified:false,followers_count:243,friends_count:530,listed_count:0,favourites_count:26,statuses_count:5672,created_at:sun aug 28 18:58:49 +0000 2011,utc_offset:-14400,time_zone:eastern time (us & canada),geo_enabled:false,lang:es,contributors_enabled:false,is_translator:false,profile_background_color:642d8b,profile_background_image_url:http:\/\/pbs.twimg.com\/profile_background_images\/767201253\/661eb2d4915e9ee6566647dcbaab0186.jpeg,profile_background_image_url_https:https:\/\/pbs.twimg.com\/profile_background_images\/767201253\/661eb2d4915e9ee6566647dcbaab0186.jpeg,profile_background_tile:true,profile_link_color:ff0000,profile_sidebar_border_color:ffffff,profile_sidebar_fill_color:7ac3ee,profile_text_color:3d1957,profile_use_background_image:true,profile_image_url:http:\/\/pbs.twimg.com\/profile_images\/455873054703648768\/_b4mf6o7_normal.jpeg,profile_image_url_https:https:\/\/pbs.twimg.com\/profile_images\/455873054703648768\/_b4mf6o7_normal.jpeg,profile_banner_url:https:\/\/pbs.twimg.com\/profile_banners\/363819213\/1402261141,default_profile:false,default_profile_image:false,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,retweeted_status:{created_at:wed jul 16 13:45:28 +0000 2014,id:489405458168709120,id_str:489405458168709120,text:our milan show is now sold out, thankyou :d tickets are still available for most of europe ! http:\/\/t.co\/arnh7pvoap http:\/\/t.co\/t5wzyocrtu,source:\u003ca href=\http:\/\/twitter.com\ rel=\nofollow\\u003etwitter web client\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:264107729,id_str:264107729,name:5 seconds of summer,screen_name:5sos,location:sydney, australia,url:http:\/\/www.facebook.com\/5secondsofsummer,description:4 aussies making music :) love the people who support us! our album is out :) http:\/\/po.st\/or93y4 | #ashton5sos #calum5sos #michael5sos #luke5sos,protected:false,verified:true,followers_count:3704204,friends_count:28660,listed_count:20024,favourites_count:1061,statuses_count:17297,created_at:fri mar 11 10:18:46 +0000 2011,utc_offset:36000,time_zone:sydney,geo_enabled:false,lang:en,contributors_enabled:false,is_translator:false,profile_background_color:000000,profile_background_image_url:http:\/\/pbs.twimg.com\/profile_background_images\/483531430371147778\/0gzkh2zi.jpeg,profile_background_image_url_https:https:\/\/pbs.twimg.com\/profile_background_images\/483531430371147778\/0gzkh2zi.jpeg,profile_background_tile:false,profile_link_color:c21b1b,profile_sidebar_border_color:ffffff,profile_sidebar_fill_color:ddeef6,profile_text_color:333333,profile_use_background_image:true,profile_image_url:http:\/\/pbs.twimg.com\/profile_images\/485730748574752768\/zm1ctcvv_normal.jpeg,profile_image_url_https:https:\/\/pbs.twimg.com\/profile_images\/485730748574752768\/zm1ctcvv_normal.jpeg,profile_banner_url:https:\/\/pbs.twimg.com\/profile_banners\/264107729\/1404117825,default_profile:false,default_profile_image:false,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,retweet_count:12648,favorite_count:31390,entities:{hashtags:[],trends:[],urls:[{url:http:\/\/t.co\/arnh7pvoap,expanded_url:http:\/\/5sos.com\/live,display_url:5sos.com\/live,indices:[93,115]}],user_mentions:[],symbols:[],media:[{id:489405457111715840,id_str:489405457111715840,indices:[116,138],media_url:http:\/\/pbs.twimg.com\/media\/bsq3q5zieaakbgg.jpg,media_url_https:https:\/\/pbs.twimg.com\/media\/bsq3q5zieaakbgg.jpg,url:http:\/\/t.co\/t5wzyocrtu,display_url:pic.twitter.com\/t5wzyocrtu,expanded_url:http:\/\/twitter.com\/5sos\/status\/489405458168709120\/photo\/1,type:photo,sizes:{small:{w:340,h:613,resize:fit},thumb:{w:150,h:150,resize:crop},medium:{w:600,h:1081,resize:fit},large:{w:811,h:1461,resize:fit}}}]},favorited:false,retweeted:false,possibly_sensitive:false,filter_level:low,lang:en},retweet_count:0,favorite_count:0,entities:{hashtags:[],trends:[],urls:[{url:http:\/\/t.co\/arnh7pvoap,expanded_url:http:\/\/5sos.com\/live,display_url:5sos.com\/live,indices:[103,125]}],user_mentions:[{screen_name:5sos,name:5 seconds of summer,id:264107729,id_str:264107729,indices:[3,8]}],symbols:[],media:[{id:489405457111715840,id_str:489405457111715840,indices:[126,140],media_url:http:\/\/pbs.twimg.com\/media\/bsq3q5zieaakbgg.jpg,media_url_https:https:\/\/pbs.twimg.com\/media\/bsq3q5zieaakbgg.jpg,url:http:\/\/t.co\/t5wzyocrtu,display_url:pic.twitter.com\/t5wzyocrtu,expanded_url:http:\/\/twitter.com\/5sos\/status\/489405458168709120\/photo\/1,type:photo,sizes:{small:{w:340,h:613,resize:fit},thumb:{w:150,h:150,resize:crop},medium:{w:600,h:1081,resize:fit},large:{w:811,h:1461,resize:fit}},source_status_id:489405458168709120,source_status_id_str:489405458168709120}]},favorited:false,retweeted:false,possibly_sensitive:false,filter_level:medium,lang:en} {created_at:sat jan 16 12:00:48 +0000 2016,id:688330056410755072,id_str:688330056410755072,text:i was going to bake a cake and listen to the football. flour refund?,source:\u003ca href=\http:\/\/twitter.com\/download\/iphone\ rel=\nofollow\\u003etwitter for iphone\u003c\/a\u003e,truncated:false,in_reply_to_status_id:null,in_reply_to_status_id_str:null,in_reply_to_user_id:null,in_reply_to_user_id_str:null,in_reply_to_screen_name:null,user:{id:252303653,id_str:252303653,name:pete blackman,screen_name:peteblackman,location:null,url:null,description:null,protected:false,verified:false,followers_count:409,friends_count:903,listed_count:18,favourites_count:5664,statuses_count:22919,created_at:mon feb 14 22:44:37 +0000 2011,utc_offset:3600,time_zone:amsterdam,geo_enabled:false,lang:en,contributors_enabled:false,is_translator:false,profile_background_color:c0deed,profile_background_image_url:http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png,profile_background_image_url_https:https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png,profile_background_tile:false,profile_link_color:0084b4,profile_sidebar_border_color:c0deed,profile_sidebar_fill_color:ddeef6,profile_text_color:333333,profile_use_background_image:true,profile_image_url:http:\/\/pbs.twimg.com\/profile_images\/2600097910\/image_normal.jpg,profile_image_url_https:https:\/\/pbs.twimg.com\/profile_images\/2600097910\/image_normal.jpg,default_profile:true,default_profile_image:false,following:null,follow_request_sent:null,notifications:null},geo:null,coordinates:null,place:null,contributors:null,is_quote_status:false,retweet_count:0,favorite_count:0,entities:{hashtags:[],urls:[],user_mentions:[],symbols:[]},favorited:false,retweeted:false,filter_level:low,lang:en,timestamp_ms:1452945648659} Or is there any other way but using split? I would really appreciate your tips. The error is as below. 16/09/18 22:49:37 ERROR TaskSetManager: Task 0 in stage 0.0 failed 1 times; aborting job
Hi hope I understand the question correctly, you are attempting to read a file and with the text mentioned above and then print the "text" mentioned in file containing json If the above assumption is correct, here a simple code which would do this: val matchingPattern = "(?i)(text:)(.+?)(,source:)".r val tweets = scala.io.Source.fromPath("/home/tobby/data/shortTwitter.txt").getLines.reduceLeft(_+_) matchingPattern.findAllIn(tweets).matchData foreach { m => println(m.group(2)) } Hope it helps, if the above assumption is not correct please provide a sample input and expected output
JSONDecodeError when iterating twitter data
I'm trying to iterate twitter data which is stored in a json file: fname = 'test.json' with open(fname, 'r') as f: for line in f: tweet = json.loads(line)['text'] print(tweet) It prints the first tweet in the file just fine but when it iterates for a second time it gives me a JSONDecodeError: JSONDecodeError: Expecting value: line 2 column 1 (char 1) My JSON file is 650Mb is size approximately. To get the twitter data I used the StreamListener from the Twitter API. Here is a glimpse into my JSON file: {"created_at":"Sun Apr 24 05:37:02 +0000 2016","id":724109877732204544,"id_str":"724109877732204544","text":"JONES RETURNS WITH A UNANIMOUS DECISION WIN IVER OVINCE SAINT PREUX! #UFC197 https:\/\/t.co\/KlfaAh9h21","source":"\u003ca href=\"http:\/\/instagram.com\" rel=\"nofollow\"\u003eInstagram\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":714389668633116672,"id_str":"714389668633116672","name":"Leon Doyle","screen_name":"TheLDPodcast","location":"Dublin, Ireland","url":"http:\/\/www.youtube.com","description":"A weekly\/bi-weekly podcast focused mainly around MMA, Boxing, fighting etc. With the occasional random topic.","protected":false,"verified":false,"followers_count":7,"friends_count":59,"listed_count":0,"favourites_count":3,"statuses_count":31,"created_at":"Mon Mar 28 09:52:24 +0000 2016","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"004455","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/714390864030797824\/REXXKCvs_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/714390864030797824\/REXXKCvs_normal.jpg","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UFC197","indices":[69,76]}],"urls":[{"url":"https:\/\/t.co\/KlfaAh9h21","expanded_url":"https:\/\/www.instagram.com\/p\/BEkk6Gewpqy\/","display_url":"instagram.com\/p\/BEkk6Gewpqy\/","indices":[77,100]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1461476222819"} {"created_at":"Sun Apr 24 05:37:03 +0000 2016","id":724109879200366592,"id_str":"724109879200366592","text":"regrann from #ufc - #AndStill UFC flyweight champ #MightyMouseUFC! #UFC197\n\nPresented by\u2026 https:\/\/t.co\/zbE5CsFxMJ","source":"\u003ca href=\"http:\/\/instagram.com\" rel=\"nofollow\"\u003eInstagram\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1070221260,"id_str":"1070221260","name":"Will Manuel","screen_name":"TheWillManuel","location":"Kenai, AK","url":null,"description":"Alaskan. Paramedic. Firefighter. Industrial Security. Libertarian. 2nd Amendment. Liberty. BJJ & Muay Thai novice. #TeamRed #RedemptionMMA #BJJ #MuayThai #MMA","protected":false,"verified":false,"followers_count":437,"friends_count":573,"listed_count":32,"favourites_count":2516,"statuses_count":3184,"created_at":"Tue Jan 08 07:22:47 +0000 2013","utc_offset":-28800,"time_zone":"Alaska","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/579042288040435713\/VeA-zI45.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/579042288040435713\/VeA-zI45.jpeg","profile_background_tile":true,"profile_link_color":"4A913C","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/715188796615237632\/JvxeLz8D_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/715188796615237632\/JvxeLz8D_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/1070221260\/1447179132","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"AndStill","indices":[22,31]},{"text":"UFC197","indices":[69,76]}],"urls":[{"url":"https:\/\/t.co\/zbE5CsFxMJ","expanded_url":"https:\/\/www.instagram.com\/p\/BEkk6a0QMeX\/","display_url":"instagram.com\/p\/BEkk6a0QMeX\/","indices":[92,115]}],"user_mentions":[{"screen_name":"ufc","name":"#UFC197","id":6446742,"id_str":"6446742","indices":[13,17]},{"screen_name":"MightyMouseUFC","name":"Demetrious Johnson","id":140845817,"id_str":"140845817","indices":[52,67]}],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1461476223169"} {"created_at":"Sun Apr 24 05:37:03 +0000 2016","id":724109882341896192,"id_str":"724109882341896192","text":"RT #BESTFlGHTS: Jon Jones flips off Daniel Cormier at #UFC197 https:\/\/t.co\/S0pDvRWhfW","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1019191860,"id_str":"1019191860","name":"Paul","screen_name":"Paulie_Frat","location":"Mount Pocono, PA","url":null,"description":"...","protected":false,"verified":false,"followers_count":272,"friends_count":259,"listed_count":0,"favourites_count":1580,"statuses_count":1622,"created_at":"Tue Dec 18 07:10:12 +0000 2012","utc_offset":-14400,"time_zone":"Eastern Time (US & Canada)","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"131516","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif","profile_background_tile":true,"profile_link_color":"009999","profile_sidebar_border_color":"EEEEEE","profile_sidebar_fill_color":"EFEFEF","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/512140999444164608\/4H2fiOtg_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/512140999444164608\/4H2fiOtg_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/1019191860\/1461422809","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Sun Apr 24 05:12:13 +0000 2016","id":724103630702432256,"id_str":"724103630702432256","text":"Jon Jones flips off Daniel Cormier at #UFC197 https:\/\/t.co\/S0pDvRWhfW","source":"\u003ca href=\"http:\/\/bufferapp.com\" rel=\"nofollow\"\u003eBuffer\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":1370712786,"id_str":"1370712786","name":"BEST FIGHTS","screen_name":"BESTFlGHTS","location":"MMA, Boxing, Street Fights","url":"http:\/\/snapchat.com\/add\/wshhfans","description":"Parody, we do not own the content posted DM's are open send me your fight","protected":false,"verified":false,"followers_count":156257,"friends_count":17861,"listed_count":83,"favourites_count":1,"statuses_count":6723,"created_at":"Sun Apr 21 22:43:19 +0000 2013","utc_offset":-25200,"time_zone":"Arizona","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"131516","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif","profile_background_tile":true,"profile_link_color":"ABB8C2","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"EFEFEF","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/620356388833734657\/NvmkmGDk_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/620356388833734657\/NvmkmGDk_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/1370712786\/1460756748","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":740,"favorite_count":624,"entities":{"hashtags":[{"text":"UFC197","indices":[38,45]}],"urls":[{"url":"https:\/\/t.co\/S0pDvRWhfW","expanded_url":"http:\/\/vine.co\/v\/iU5T53X6U7J","display_url":"vine.co\/v\/iU5T53X6U7J","indices":[46,69]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"UFC197","indices":[54,61]}],"urls":[{"url":"https:\/\/t.co\/S0pDvRWhfW","expanded_url":"http:\/\/vine.co\/v\/iU5T53X6U7J","display_url":"vine.co\/v\/iU5T53X6U7J","indices":[62,85]}],"user_mentions":[{"screen_name":"BESTFlGHTS","name":"BEST FIGHTS","id":1370712786,"id_str":"1370712786","indices":[3,14]}],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1461476223918"} How can I solve this issue?
If your JSON file has exactly the same structure as the piece you are posting, the empty lines between tweets indeed cause a JSONDecodeError. If that's the problem, just check that the line is not empty before processing: In [12]: with open(fname, 'r') as f: for line in f: if (not line.strip()): continue tweet = json.loads(line)['text'] print(tweet) Hope it helps.