I'm working with twitter data which fetched in jsonl form. I've converted it to json and am trying to convert it to a csv (to import into a program which accepts either csv or MySQL). However, some people put forced new lines into their tweets or bios. This is causing the csv file to have multiple lines for entries, often breaking up in the middle of a tweet. I've tried a few of the python json to csv codes floating on github.
The latest attempt I tried:
jq -s "." tiny00subset.jsonl > tiny00subset.json
json2csv -i tiny00subset.json -o tiny00subset.csv
Partial example tweet (json format):
{
"created_at": "Mon Aug 13 10:40:34 +0000 2018",
"id": 1028954459110555600,
"id_str": "1028954459110555649",
"full_text": "Oh well, they deal with it quite well. Like they add numbers and facts and such crazy stuff.\nhttps://REPLACED/DuBGmHCnG8\n#climatechange https://REPLACED/d5IBchM3Uk",
"truncated": false,
"display_text_range": [
0,
131
],
"entities": {
"hashtags": [
{
"text": "climatechange",
"indices": [
117,
131
]
}
],
"symbols": [],
"user_mentions": [],
"urls": [
{
"url": "https://REPLACED/DuBGmHCnG8",
"expanded_url": "https://tamino.wordpress.com/2018/08/08/usa-temperature-can-i-sucker-you/",
"display_url": "tamino.wordpress.com/2018/08/08/usa…",
"indices": [
93,
116
]
},
{
"url": "https://REPLACED/d5IBchM3Uk",
"expanded_url": "https://twitter.com/Tony__Heller/status/1028672939753758720",
"display_url": "twitter.com/Tony__Heller/s…",
"indices": [
132,
155
]
}
]
},
}
CSV Output:
"Mon Aug 13 10:40:34 +0000 2018",1028954459110555600,"1028954459110555649","Oh well, they deal with it quite well. Like they add numbers and facts and such crazy stuff.
https://REPLACED/DuBGmHCnG8
#climatechange https://REPLACED/d5IBchM3Uk",false,"[0,131]","{""hashtags"":[{""text"":""climatechange"",""indices"":[117,131]}],""symbols"":[],""user_mentions"":[],""urls"":[{""url"":""https://REPLACED/DuBGmHCnG8"",""expanded_url"":""https://tamino.wordpress.com/2018/08/08/usa-temperature-can-i-sucker-you/"",""display_url"":""tamino.wordpress.com/2018/08/08/usa…"",""indices"":[93,116]},{""url"":""https://REPLACED/d5IBchM3Uk"",""expanded_url"":""https://twitter.com/Tony__Heller/status/1028672939753758720"",""display_url"":""twitter.com/Tony__Heller/s…"",""indices"":[132,155]}]}","TweetDeck",,,,,,"{""id"":59806323,""id_str"":""59806323"",""name"":""Daniel"",""screen_name"":""sleeksorrow"",""location"":""Karlsruhe, Germany"",""description"":""Politik, IT, Blödsinn und deren Schnittmenge. Ebenfalls: Hochmittelalter Darstellung, Falknerei, Greifvogelschutz - profile picture by #herrkausk"",""url"":""https://REPLACED/E8aNHIhCtg"",""entities"":{""url"":{""urls"":[{""url"":""https://REPLACED/E8aNHIhCtg"",""expanded_url"":""http://sleeksorrow.blogspot.com/"",""display_url"":""sleeksorrow.blogspot.com"",""indices"":[0,23]}]},""description"":{""urls"":[]}},""protected"":false,""followers_count"":572,""friends_count"":392,""listed_count"":47,""created_at"":""Fri Jul 24 15:15:25 +0000 2009"",""favourites_count"":13259,""utc_offset"":null,""time_zone"":null,""geo_enabled"":false,""verified"":false,""statuses_count"":48861,""lang"":null,""contributors_enabled"":false,""is_translator"":false,""is_translation_enabled"":false,""profile_background_color"":""1A1B1F"",""profile_background_image_url"":""http://abs.twimg.com/images/themes/theme9/bg.gif"",""profile_background_image_url_https"":""https://abs.twimg.com/images/themes/theme9/bg.gif"",""profile_background_tile"":false,""profile_image_url"":""http://pbs.twimg.com/profile_images/877219681513480192/1rj4xqpK_normal.jpg"",""profile_image_url_https"":""https://pbs.twimg.com/profile_images/877219681513480192/1rj4xqpK_normal.jpg"",""profile_banner_url"":""https://pbs.twimg.com/profile_banners/59806323/1397029131"",""profile_image_extensions_alt_text"":null,""profile_banner_extensions_alt_text"":null,""profile_link_color"":""2FC2EF"",""profile_sidebar_border_color"":""181A1E"",""profile_sidebar_fill_color"":""252429"",""profile_text_color"":""666666"",""profile_use_background_image"":true,""has_extended_profile"":false,""default_profile"":false,""default_profile_image"":false,""can_media_tag"":true,""followed_by"":false,""following"":false,""follow_request_sent"":false,""notifications"":false,""translator_type"":""none""}",,,,,true,1028672939753758700,"1028672939753758720","{""url"":""https://REPLACED/d5IBchM3Uk"",""expanded"":""https://twitter.com/Tony__Heller/status/1028672939753758720"",""display"":""twitter.com/Tony__Heller/s…""}","{""created_at"":""Sun Aug 12 16:01:55 +0000 2018"",""id"":1028672939753758700,""id_str"":""1028672939753758720"",""full_text"":""#DeanFieldingF1 It is very difficult or impossible for climate alarmists to deal with reality. https://REPLACED/wOJTptxIqH"",""truncated"":false,""display_text_range"":[16,94],""entities"":{""hashtags"":[],""symbols"":[],""user_mentions"":[{""screen_name"":""DeanFieldingF1"",""name"":""Dean Fielding"",""id"":797295219825897500,""id_str"":""797295219825897472"",""indices"":[0,15]}],""urls"":[],""media"":[{""id"":1028672868849090600,""id_str"":""1028672868849090560"",""indices"":[95,118],""media_url"":""http://pbs.twimg.com/media/DkaUhinVAAARrIY.jpg"",""media_url_https"":""https://pbs.twimg.com/media/DkaUhinVAAARrIY.jpg"",""url"":""https://REPLACED/wOJTptxIqH"",""display_url"":""pic.twitter.com/wOJTptxIqH"",""expanded_url"":""https://twitter.com/SteveSGoddard/status/1028672939753758720/photo/1"",""type"":""photo"",""sizes"":{""thumb"":{""w"":150,""h"":150,""resize"":""crop""},""medium"":{""w"":1070,""h"":983,""resize"":""fit""},""large"":{""w"":1070,""h"":983,""resize"":""fit""},""small"":{""w"":680,""h"":625,""resize"":""fit""}},""features"":{""orig"":{""faces"":[]},""medium"":{""faces"":[]},""large"":{""faces"":[]},""small"":{""faces"":[]}}}]},""extended_entities"":{""media"":[{""id"":1028672868849090600,""id_str"":""1028672868849090560"",""indices"":[95,118],""media_url"":""http://pbs.twimg.com/media/DkaUhinVAAARrIY.jpg"",""media_url_https"":""https://pbs.twimg.com/media/DkaUhinVAAARrIY.jpg"",""url"":""https://REPLACED/wOJTptxIqH"",""display_url"":""pic.twitter.com/wOJTptxIqH"",""expanded_url"":""https://twitter.com/SteveSGoddard/status/1028672939753758720/photo/1"",""type"":""photo"",""sizes"":{""thumb"":{""w"":150,""h"":150,""resize"":""crop""},""medium"":{""w"":1070,""h"":983,""resize"":""fit""},""large"":{""w"":1070,""h"":983,""resize"":""fit""},""small"":{""w"":680,""h"":625,""resize"":""fit""}},""features"":{""orig"":{""faces"":[]},""medium"":{""faces"":[]},""large"":{""faces"":[]},""small"":{""faces"":[]}},""ext_alt_text"":null},{""id"":1028672883986333700,""id_str"":""1028672883986333697"",""indices"":[95,118],""media_url"":""http://pbs.twimg.com/media/DkaUibAVAAEaQt0.jpg"",""media_url_https"":""https://pbs.twimg.com/media/DkaUibAVAAEaQt0.jpg"",""url"":""https://REPLACED/wOJTptxIqH"",""display_url"":""pic.twitter.com/wOJTptxIqH"",""expanded_url"":""https://twitter.com/SteveSGoddard/status/1028672939753758720/photo/1"",""type"":""photo"",""sizes"":{""thumb"":{""w"":150,""h"":150,""resize"":""crop""},""medium"":{""w"":1070,""h"":983,""resize"":""fit""},""large"":{""w"":1070,""h"":983,""resize"":""fit""},""small"":{""w"":680,""h"":625,""resize"":""fit""}},""features"":{""orig"":{""faces"":[]},""medium"":{""faces"":[]},""large"":{""faces"":[]},""small"":{""faces"":[]}},""ext_alt_text"":null}]},""source"":""Twitter Web Client"",""in_reply_to_status_id"":1028671170802081800,""in_reply_to_status_id_str"":""1028671170802081793"",""in_reply_to_user_id"":797295219825897500,""in_reply_to_user_id_str"":""797295219825897472"",""in_reply_to_screen_name"":""DeanFieldingF1"",""user"":{""id"":435704007,""id_str"":""435704007"",""name"":""Tony Heller"",""screen_name"":""Tony__Heller"",""location"":""Colorado"",""description"":""https://REPLACED/j5CaDNyIqE"",""url"":""https://REPLACED/Pyn117xXna"",""entities"":{""url"":{""urls"":[{""url"":""https://REPLACED/Pyn117xXna"",""expanded_url"":""http://realclimatescience.com"",""display_url"":""realclimatescience.com"",""indices"":[0,23]}]},""description"":{""urls"":[{""url"":""https://REPLACED/j5CaDNyIqE"",""expanded_url"":""https://realclimatescience.com/who-is-tony-heller/"",""display_url"":""realclimatescience.com/who-is-tony-he…"",""indices"":[0,23]}]}},""protected"":false,""followers_count"":44955,""friends_count"":374,""listed_count"":886,""created_at"":""Tue Dec 13 10:44:34 +0000 2011"",""favourites_count"":3740,""utc_offset"":null,""time_zone"":null,""geo_enabled"":true,""verified"":false,""statuses_count"":165165,""lang"":null,""contributors_enabled"":false,""is_translator"":false,""is_translation_enabled"":false,""profile_background_color"":""185370"",""profile_background_image_url"":""http://abs.twimg.com/images/themes/theme1/bg.png"",""profile_background_image_url_https"":""https://abs.twimg.com/images/themes/theme1/bg.png"",""profile_background_tile"":false,""profile_image_url"":""http://pbs.twimg.com/profile_images/1175541923508916225/0qEi4yIj_normal.jpg"",""profile_image_url_https"":""https://pbs.twimg.com/profile_images/1175541923508916225/0qEi4yIj_normal.jpg"",""profile_banner_url"":""https://pbs.twimg.com/profile_banners/435704007/1469798959"",""profile_image_extensions_alt_text"":null,""profile_banner_extensions_alt_text"":null,""profile_link_color"":""0084B4"",""profile_sidebar_border_color"":""FFFFFF"",""profile_sidebar_fill_color"":""DDEEF6"",""profile_text_color"":""333333"",""profile_use_background_image"":true,""has_extended_profile"":false,""default_profile"":false,""default_profile_image"":false,""can_media_tag"":false,""followed_by"":false,""following"":false,""follow_request_sent"":false,""notifications"":false,""translator_type"":""none""},""geo"":null,""coordinates"":null,""place"":null,""contributors"":null,""is_quote_status"":false,""retweet_count"":16,""favorite_count"":27,""favorited"":false,""retweeted"":false,""possibly_sensitive"":false,""lang"":""en""}",0,0,false,false,false,"en"
starting from
{
"created_at": "Mon Aug 13 10:40:34 +0000 2018",
"id": 1028954459110555600,
"id_str": "1028954459110555649",
"full_text": "Oh well, they deal with it quite well. Like they add numbers and facts and such crazy stuff.\nhttps://REPLACED/DuBGmHCnG8\n#climatechange https://REPLACED/d5IBchM3Uk",
"truncated": false,
"display_text_range": [
0,
131
],
"entities": {
"hashtags": [
{
"text": "climatechange",
"indices": [
117,
131
]
}
],
"symbols": [],
"user_mentions": [],
"urls": [
{
"url": "https://REPLACED/DuBGmHCnG8",
"expanded_url": "https://tamino.wordpress.com/2018/08/08/usa-temperature-can-i-sucker-you/",
"display_url": "tamino.wordpress.com/2018/08/08/usa…",
"indices": [
93,
116
]
},
{
"url": "https://REPLACED/d5IBchM3Uk",
"expanded_url": "https://twitter.com/Tony__Heller/status/1028672939753758720",
"display_url": "twitter.com/Tony__Heller/s…",
"indices": [
132,
155
]
}
]
}
}
and running (it's https://github.com/johnkerl/miller)
mlr --j2c unsparsify input.json >input.csv
you have this kind of output https://gist.github.com/aborruso/6e0361923a3c45b9fe55ebf7590953de#file-output-csv
If you open it as raw you have the carriage return. And a spreasheet read it properly.
Then, using properly the import process you need to use, the \n is not a problem.
On trying to search a particular address by postal code(2000) it does not return the expected formatted address correctly, but whereas on searching using the address(frederiksberg) instead of postal code it returns the expected formatted address correctly.
https://maps.googleapis.com/maps/api/geocode/json?address=2000&components=country:Denmark&key=apikey
https://maps.googleapis.com/maps/api/geocode/json?address=frederiksberg&components=country:Denmark&key=apikey
But for a similar search with address set to postal code(4000) it returns the expected formatted address correctly, url below.
https://maps.googleapis.com/maps/api/geocode/json?address=4000&components=country:Denmark&key=apikey
Please let us know why its not returning the expected formatted address for address set to 2000.
TIA!
We experienced a similar issue with Postcode 2000 in Australia (which is Sydney), returned OK but with Zero_Result returned. All the other postcodes worked fine except 2000
Our initial search was
geocode({
address: "2000 Australia",
region: "AU" })
We solving it by adding "Postcode" before the search
geocode({
address: "Postcode 2000 Australia",
region: "AU" })
The Zip code 2000 matches more than one address. You can get the most common ones this way:
https://geocode.xyz/2000?region=DK
output:
Denmark x,y z: 55.68132,12.52966
🇩🇰
3 Solbjerg Plads, Frederiksberg C, Denmark » Confidence Score: 0.5
Frederiksberg DK 2000 Denmark
Frederiksberg Kommune DK 2000 Denmark
Brønshøj DK 2000 Denmark
Bronshoj DK 2000 Denmark
Frederiksberg C DK 2000 Denmark
Or in json format:
https://geocode.xyz/2000?region=DK&json=1
Json Output:
{
"standard": {
"addresst": "3 Solbjerg Plads",
"stnumber": "3",
"prov": "DK",
"city": "Frederiksberg C",
"countryname": "Denmark",
"postal": "2000",
"confidence": "0.5"
},
"longt": "12.52966",
"alt": {
"loc": [
{
"longt": "12.51635",
"city": "Frederiksberg",
"cc": "6353",
"latt": "55.68239"
},
{
"longt": "12.51704",
"city": "Frederiksberg Kommune",
"cc": "5629",
"latt": "55.68255"
},
{
"longt": "12.50332",
"city": "Bronshoj",
"cc": "2",
"latt": "55.67113"
},
{
"longt": "12.50332",
"city": "Bronshoj",
"cc": "2",
"latt": "55.67113"
},
{
"longt": "12.52966",
"city": "Frederiksberg C",
"cc": "1",
"latt": "55.68132"
}
]
},
"latt": "55.68132"
}