I'm trying to create a Jupyter Notebook project using VSCode. In this project I'm using Foursquare API to query venues around Seattle. However I've been running to a little problem when the notebook outputs JSON file response. I keep getting the response with only the second part of the JSON file when I set the parameter LIMIT to more than 20 venues.
Let me just be clear.
This is only happening with VScode. When I input the same lines of code in a Jupyter Notebook using the browser and setting the parameter LIMIT to more than 20, I get the full JSON file response.
Below is the code sample used in VSCode:
Imported libraries:
import pandas as pd
import requests
import json
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
GEOPY.GEOCODERS to retrive Seattle coordinates.
address = 'Seattle, WA'
geolocator = Nominatim(user_agent="foursquare")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)
Variables with credentials and parameters for the API request.(Note how the limit is set to 21)
CLIENT_ID = 'client_id' # your Foursquare ID
CLIENT_SECRET = 'client_secret' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 21
RADIUS = 500
URL for the API request.
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, RADIUS, LIMIT)
The API request.
results = requests.get(url).json()
results
The undiserable output: (Notice how the meta data and the first part of file is missing)
['600 4th Ave (5th & Cherry)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d129941735',
'name': 'City Hall',
'pluralName': 'City Halls',
'shortName': 'City Hall',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/cityhall_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749016',
'hasPerk': False},
{'id': '4b49098df964a520286326e3',
'name': 'Seattle City Hall',
'location': {'address': '600 4th Ave',
'crossStreet': 'btwn Cherry & James',
'lat': 47.60391791602839,
'lng': -122.32999464587043,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60391791602839,
'lng': -122.32999464587043}],
'distance': 10,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['600 4th Ave (btwn Cherry & James)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d129941735',
'name': 'City Hall',
'pluralName': 'City Halls',
'shortName': 'City Hall',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/cityhall_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749016',
'hasPerk': False},
{'id': '4e44510fe4cd394059e89099',
'name': 'Karr Tuttle Campbell',
'location': {'address': '701 5th Ave',
'lat': 47.60440702245942,
'lng': -122.33136024826479,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60440702245942,
'lng': -122.33136024826479}],
'distance': 116,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['701 5th Ave',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d124941735',
'name': 'Office',
'pluralName': 'Offices',
'shortName': 'Office',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749016',
'hasPerk': False},
{'id': '4c3f616ed691c9b6d8a6890a',
'name': 'City Hall Plaza',
'location': {'address': '600 4th Ave',
'crossStreet': '4th & Cherry',
'lat': 47.60378595075962,
'lng': -122.33051066366723,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60378595075962,
'lng': -122.33051066366723}],
'distance': 34,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['600 4th Ave (4th & Cherry)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d129941735',
'name': 'City Hall',
'pluralName': 'City Halls',
'shortName': 'City Hall',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/cityhall_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749016',
'hasPerk': False},
{'id': '4ddbc954091aae6b185e2968',
'name': 'Seattle Municipal Tower 44th Floor',
'location': {'address': '700 5th Ave',
'crossStreet': 'Cherry Street',
'lat': 47.60509704657094,
'lng': -122.3301267150704,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60509704657094,
'lng': -122.3301267150704}],
'distance': 140,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['700 5th Ave (Cherry Street)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d124941735',
'name': 'Office',
'pluralName': 'Offices',
'shortName': 'Office',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749016',
'hasPerk': False},
{'id': '598b268efc9e9467c3d48589',
'name': 'Bertha Knight Landes Conference Room',
'location': {'lat': 47.603764,
'lng': -122.32945,
'labeledLatLngs': [{'label': 'display',
'lat': 47.603764,
'lng': -122.32945}],
'distance': 46,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['Seattle, WA 98104', 'United States']},
'categories': [{'id': '4bf58dd8d48988d127941735',
'name': 'Conference Room',
'pluralName': 'Conference Rooms',
'shortName': 'Conference room',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/office_conferenceroom_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749016',
'hasPerk': False},
{'id': '4a9e9437f964a5207c3a20e3',
'name': 'Einstein Bros Bagels',
'location': {'address': '600 4th Ave',
'lat': 47.60389534060459,
'lng': -122.33065690674596,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60389534060459,
'lng': -122.33065690674596}],
'distance': 45,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['600 4th Ave',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d179941735',
'name': 'Bagel Shop',
'pluralName': 'Bagel Shops',
'shortName': 'Bagels',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/bagels_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749016',
'hasPerk': False},
{'id': '427ab380f964a52096211fe3',
'name': 'Columbia Center',
'location': {'address': '701 5th Ave',
'crossStreet': 'at Columbia St',
'lat': 47.60452412230289,
'lng': -122.33075151763909,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60452412230289,
'lng': -122.33075151763909},
{'label': 'entrance', 'lat': 47.604432, 'lng': -122.330763}],
'distance': 92,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['701 5th Ave (at Columbia St)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d130941735',
'name': 'Building',
'pluralName': 'Buildings',
'shortName': 'Building',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749016',
'hasPerk': False}
........ CONTINUED RESULTS INTENTIONALLY DELETED
Running the code with LIMIT parameter set to 20 which gives me the desired output.
CLIENT_ID = 'client_id'
CLIENT_SECRET = 'client_secret'
VERSION = '20180604'
LIMIT = 20
RADIUS = 500
Again, the url for the request. (No changes here)
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, RADIUS, LIMIT)
url
One more time, the API request.(No changes)
results = requests.get(url).json()
results
Finally the desired output with the full JSON file:
{'meta': {'code': 200, 'requestId': '606ce68a749e75020fe96e3a'},
'response': {'venues': [{'id': '4c3b9d165810a593aff7ba3c',
'name': 'City Council Chambers',
'location': {'address': '600 4th Ave',
'crossStreet': '5th & Cherry',
'lat': 47.603861440975066,
'lng': -122.33006802191612,
'labeledLatLngs': [{'label': 'display',
'lat': 47.603861440975066,
'lng': -122.33006802191612},
{'label': 'entrance', 'lat': 47.603626, 'lng': -122.329618}],
'distance': 3,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['600 4th Ave (5th & Cherry)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d129941735',
'name': 'City Hall',
'pluralName': 'City Halls',
'shortName': 'City Hall',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/cityhall_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749643',
'hasPerk': False},
{'id': '4b49098df964a520286326e3',
'name': 'Seattle City Hall',
'location': {'address': '600 4th Ave',
'crossStreet': 'btwn Cherry & James',
'lat': 47.60391791602839,
'lng': -122.32999464587043,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60391791602839,
'lng': -122.32999464587043}],
'distance': 10,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['600 4th Ave (btwn Cherry & James)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d129941735',
'name': 'City Hall',
'pluralName': 'City Halls',
'shortName': 'City Hall',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/cityhall_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617749643',
'hasPerk': False},
............ CONTINUED RESULTS INTENTIONALLY DELETED
Progress update.
Apparently I'm fetching the full response file but VSCode is not displaying the whole output.
Below is a sample code from the same request with LIMIT parameter set to 30. The evidence of the full file being fetched but no displayed is shown when I slice the file to see only the first 20 venues.
Somehow I can even increase it to 22. Yet if I pass this threshold the problem reappears again.
Input:
results['response']['venues'][0:22]
Output:
[{'id': '4c3b9d165810a593aff7ba3c',
'name': 'City Council Chambers',
'location': {'address': '600 4th Ave',
'crossStreet': '5th & Cherry',
'lat': 47.603861440975066,
'lng': -122.33006802191612,
'labeledLatLngs': [{'label': 'display',
'lat': 47.603861440975066,
'lng': -122.33006802191612},
{'label': 'entrance', 'lat': 47.603626, 'lng': -122.329618}],
'distance': 3,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['600 4th Ave (5th & Cherry)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d129941735',
'name': 'City Hall',
'pluralName': 'City Halls',
'shortName': 'City Hall',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/cityhall_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617761189',
'hasPerk': False},
{'id': '4b49098df964a520286326e3',
'name': 'Seattle City Hall',
'location': {'address': '600 4th Ave',
'crossStreet': 'btwn Cherry & James',
'lat': 47.60391791602839,
'lng': -122.32999464587043,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60391791602839,
'lng': -122.32999464587043}],
'distance': 10,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['600 4th Ave (btwn Cherry & James)',
'Seattle, WA 98104',
'United States']},
'categories': [{'id': '4bf58dd8d48988d129941735',
'name': 'City Hall',
'pluralName': 'City Halls',
'shortName': 'City Hall',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/cityhall_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617761189',
'hasPerk': False},
{'id': '4e44510fe4cd394059e89099',
'name': 'Karr Tuttle Campbell',
'location': {'address': '701 5th Ave',
'lat': 47.60440702245942,
'lng': -122.33136024826479,
'labeledLatLngs': [{'label': 'display',
'lat': 47.60440702245942,
'lng': -122.33136024826479}],
'distance': 116,
'postalCode': '98104',
'cc': 'US',
'city': 'Seattle',
'state': 'WA',
'country': 'United States',
'formattedAddress': ['701 5th Ave', 'Seattle, WA 98104', 'United States']},
'categories': [{'id': '4bf58dd8d48988d124941735',
'name': 'Office',
'pluralName': 'Offices',
'shortName': 'Office',
'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_',
'suffix': '.png'},
'primary': True}],
'referralId': 'v-1617761189',
'hasPerk': False}
It Looks like I found the solution to my problem.
Thank you everyone for all your diligent attention in this serious matter.
Here it goes the instructions:
Go to VScode Settings > In the search box type: 'text output' > Set the text output limit to 0
I hope this helps the next rookie in line.
Related
Edit: sample json of details column:
{6591: '[]',
8112: "[{'name': 'start', 'time': 1659453223851}, {'name': 'arrival', 'time': 1659454209024, 'location': [-73.7895605, 40.6869539]}, {'name': 'departure', 'time': 1659453289013, 'location': [-73.8124575, 40.7091602]}]",
5674: '[]',
4236: '[]',
3148: "[{'name': 'start', 'time': 1659121571280}, {'name': 'arrival', 'time': 1659122768105, 'location': [-74.220351348, 40.748419051]}, {'name': 'departure', 'time': 1659121605076, 'location': [-74.189452444, 40.715865856]}]",
3408: "[{'name': 'start', 'time': 1659113772531}, {'name': 'arrival', 'time': 1659114170204, 'location': [-73.9469142, 40.671488]}, {'name': 'departure', 'time': 1659113832693, 'location': [-73.956379, 40.6669802]}]",
1438: '[]',
3634: '[]',
5060: "[{'name': 'start', 'time': 1659190337964}, {'name': 'arrival', 'time': 1659190367182, 'location': [-76.614058283, 39.292697049]}, {'name': 'departure', 'time': 1659190345722, 'location': [-76.614058283, 39.292697049]}]",
6614: '[]',
7313: '[]',
7653: '[]',
9446: '[]',
1237: '[]',
6974: "[{'name': 'start', 'time': 1659383554887}, {'name': 'adminCompletion', 'time': 1659386192031, 'data': {'adminId': 'ZFQCAL6aeS', 'sendNotificationFromAdminComplete': False}}, {'name': 'arrival', 'time': 1659385764198, 'location': [-73.943001009, 40.705886527]}, {'name': 'departure', 'time': 1659383653199, 'location': [-73.94038015, 40.814893186]}]",
762: '[]',
4843: '[]',
8682: '[]',
7271: '[]',
4672: "[{'name': 'start', 'time': 1659131562088}, {'name': 'arrival', 'time': 1659131937387, 'location': [-87.62621, 41.9015626]}, {'name': 'departure', 'time': 1659131637316, 'location': [-87.6263294, 41.9094856]}]"}
I have a dataframe columns like 'details' and 'id'. It looks like this. I want to completely flatten details column.
details id
[{'name': 'start', 'time': 1659479418}, {'name': 'arrival', 'time': 1659452651073, 'location': [-75.040536278, 40.034055]}, {'name': 'departure', 'time': 1659451650, 'location': [-75.1609003, 39.947729034]}] 1
[] 2
[] 3
[{'name': 'start', 'time': 1659126581459}, {'name': 'arrival', 'time': 1659128206850, 'location': [-80.3165751, 25.8625698]}, {'name': 'departure', 'time': 1659126641679, 'location': [-80.2511886, 25.921769]}] 4
[{'name': 'start', 'time': 1659120813100}, {'name': 'arrival', 'time': 1659121980125, 'location': [-76.642292, 39.307895253]}, {'name': 'departure', 'time': 1659120903093, 'location': [-76.741190426, 39.34240617]}] 5
[] 6
[] 7
[{'name': 'start', 'time': 1659217203753}, {'name': 'adminCompletion', 'time': 1659217336224, 'data': {'adminId': '~R~WZt7bKO979BRTqHyarS2p', 'sendNotification': False}}, {'name': 'arrival', 'time': 1659217308939, 'location': [-73.941830752, 40.702405857]}, {'name': 'departure', 'time': 1659217288936, 'location': [-73.941830752, 40.702405857]}] 8
[{'name': 'start', 'time': 1659189824814}, {'name': 'arrival', 'time': 1659191937100, 'location': [-76.406627, 39.984]}, {'name': 'departure', 'time': 1659189915191, 'location': [-76.614515552, 39.292407218]}] 9
[] 10
what is expected from this is:
start_time admincompletiontime adminId sendnotification arrival_time arrival_location departure_time departure_location id
1659479418 1.65945E+12 [-75.040536278, 40.034055] 1659451650 [-75.1609003, 39.947729034] 1
2
3
1.65913E+12 1.65913E+12 [-80.3165751, 25.8625698] 1.65913E+12 [-80.2511886, 25.921769] 4
1.65922E+12 1.65922E+12 ~R~WZt7bKO979BRTqHyarS2p FALSE 1.65922E+12 [-73.941830752, 40.702405857] 1.65922E+12 [-73.941830752, 40.702405857] 8
I want to extract all the columns that are passed as values. pd.json_normalize() did not work for me in this case. please suggest.
Your data is pretty scuffed, you need to clean it up, but following a pattern like this should start you in the right direction:
from ast import literal_eval
data = {key:literal_eval(value) for key, value in data.items()}
data = [[{y['name']:{'time':y['time'],'location':y.get('location')}} for y in x] for x in data.values() if x]
df = pd.concat([pd.json_normalize(x) for x in data])
df = (df.dropna(how='all', axis=1)
.bfill()
.dropna()
.drop_duplicates('start.time')
.reset_index(drop=True))
print(df)
Output:
start.time arrival.time arrival.location departure.time departure.location adminCompletion.time
0 1.659453e+12 1.659454e+12 [-73.7895605, 40.6869539] 1.659453e+12 [-73.8124575, 40.7091602] 1.659386e+12
1 1.659122e+12 1.659454e+12 [-73.7895605, 40.6869539] 1.659453e+12 [-73.8124575, 40.7091602] 1.659386e+12
2 1.659114e+12 1.659123e+12 [-74.220351348, 40.748419051] 1.659122e+12 [-74.189452444, 40.715865856] 1.659386e+12
3 1.659190e+12 1.659114e+12 [-73.9469142, 40.671488] 1.659114e+12 [-73.956379, 40.6669802] 1.659386e+12
4 1.659384e+12 1.659190e+12 [-76.614058283, 39.292697049] 1.659190e+12 [-76.614058283, 39.292697049] 1.659386e+12
5 1.659132e+12 1.659386e+12 [-73.943001009, 40.705886527] 1.659384e+12 [-73.94038015, 40.814893186] 1.659386e+12
I have a json string stored in a field in BigQuery which has this structure::
{'language': 'Eng', 'date_started': '2021-02-08 16: 56: 55 GMT', 'link_id': '111', 'url_variables': {'touchpoint': {'key': 'touchpoint', 'value': 'phone', 'type': 'url'
}, 'interaction_id': {'key': 'interaction_id', 'value': '111', 'type': 'url'
}
}, 'ip_address': None, 'referer': '', 'user_agent': None, 'response_time': 111, 'data_quality': [], 'longitude': '', 'latitude': '', 'country': '', 'city': '', 'region': '', 'postal': '', 'dma': '', 'survey_data': {'25': {'id': 25, 'type': 'TEXTBOX', 'question': 'feedback_source', 'section_id': 1, 'shown': False
}, '229': {'id': 229, 'type': 'TEXTBOX', 'question': 'recruitment_method', 'section_id': 1, 'shown': False
}, '227': {'id': 227, 'type': 'TEXTBOX', 'question': 'meeting_point', 'section_id': 1, 'answer': 'phone', 'shown': True
}, '221': {'id': 221, 'type': 'TEXTBOX', 'question': 'interaction_id', 'section_id': 1, 'answer': '222', 'shown': True
}, '217': {'id': 217, 'type': 'TEXTBOX', 'question': 'session_id', 'section_id': 1, 'answer': '333', 'shown': True
}, '231': {'id': 231, 'type': 'ESSAY', 'question': 'BlaBla question 4', 'section_id': 3, 'answer': 'Bla Bla answer', 'shown': True
}, '255': {'id': 255, 'type': 'TEXTBOX', 'question': 'tz_offset', 'section_id': 3, 'answer': '-120', 'shown': True
}, '77': {'id': 77, 'type': 'parent', 'question': 'Bla Bla 1', 'section_id': 35, 'options': {'10395': {'id': 10395, 'option': 'Neutraal', 'answer': '3'
}
}, 'shown': True
}, '250': {'id': 250, 'type': 'RADIO', 'question': 'Bla Bla?', 'section_id': 66, 'original_answer': '1', 'answer': '1', 'answer_id': 10860, 'shown': True
}, '251': {'id': 251, 'type': 'RADIO', 'question': 'Bla Bla', 'section_id': 66, 'original_answer': '0', 'answer': '0', 'answer_id': 10863, 'shown': True
}
}
}
I'm able to extract some of the values with the query below, but I cannot extract response_time or any of the values inside the survey_data structure.
They always come out as null.
DECLARE resp STRING
DEFAULT "{'id': '111', 'contact_id': '', 'status': 'Complete', 'is_test_data': '0', 'date_submitted': '2021-07-08 17: 02: 16 GMT', 'session_id': '111', 'language': 'Eng', 'date_started': '2021-02-08 16: 56: 55 GMT', 'link_id': '111', 'url_variables': {'touchpoint': {'key': 'touchpoint', 'value': 'phone', 'type': 'url' }, 'interaction_id': {'key': 'interaction_id', 'value': '111', 'type': 'url' } }, 'ip_address': None, 'referer': '', 'user_agent': None, 'response_time': 111, 'data_quality': [], 'longitude': '', 'latitude': '', 'country': '', 'city': '', 'region': '', 'postal': '', 'dma': '', 'survey_data': {'25': {'id': 25, 'type': 'TEXTBOX', 'question': 'feedback_source', 'section_id': 1, 'shown': False }, '229': {'id': 229, 'type': 'TEXTBOX', 'question': 'recruitment_method', 'section_id': 1, 'shown': False }, '227': {'id': 227, 'type': 'TEXTBOX', 'question': 'meeting_point', 'section_id': 1, 'answer': 'phone', 'shown': True }, '221': {'id': 221, 'type': 'TEXTBOX', 'question': 'interaction_id', 'section_id': 1, 'answer': '222', 'shown': True }, '217': {'id': 217, 'type': 'TEXTBOX', 'question': 'session_id', 'section_id': 1, 'answer': '333', 'shown': True }, '231': {'id': 231, 'type': 'ESSAY', 'question': 'BlaBla question 4', 'section_id': 3, 'answer': 'Bla Bla answer', 'shown': True }, '255': {'id': 255, 'type': 'TEXTBOX', 'question': 'tz_offset', 'section_id': 3, 'answer': '-120', 'shown': True }, '77': {'id': 77, 'type': 'parent', 'question': 'Bla Bla 1', 'section_id': 35, 'options': {'10395': {'id': 10395, 'option': 'Neutraal', 'answer': '3' } }, 'shown': True }, '250': {'id': 250, 'type': 'RADIO', 'question': 'Bla Bla?', 'section_id': 66, 'original_answer': '1', 'answer': '1', 'answer_id': 10860, 'shown': True }, '251': {'id': 251, 'type': 'RADIO', 'question': 'Bla Bla', 'section_id': 66, 'original_answer': '0', 'answer': '0', 'answer_id': 10863, 'shown': True } } }";
SELECT
JSON_VALUE( resp, '$.url_variables.interaction_id.value') as url_interaction_id_value ,
JSON_VALUE( resp, '$.url_variables.interaction_id.type') as url_interaction_id_type,
JSON_VALUE( resp, '$.language') as language,
JSON_QUERY( resp, '$.response_time') as response_time, -- NOT WORKING
JSON_QUERY( resp, '$.survey_data') as survey_data -- NOT WORKING
I tried with jq in bash from the CLI and it seems to complain about the fact that some of the None values are not quoted.
Question:
Does it mean that BigQuery attempts to extract values from the JSON string as far as it can, "until" it encounters something that it is not well formatted (e.g. the unquoted None values) and then it just cannot parse further and returns nulls ?
NB: In another app, I have been able to parse the json file in Python and extract values from inside the json string.
Looks like you have few formatting issues with your resp field which you can fix with few REPLACEs as in below example
SELECT
JSON_VALUE( resp, '$.url_variables.interaction_id.value') as url_interaction_id_value ,
JSON_VALUE( resp, '$.url_variables.interaction_id.type') as url_interaction_id_type,
JSON_VALUE( resp, '$.language') as language,
JSON_QUERY( resp, '$.response_time') as response_time, -- WORKING NOW
JSON_QUERY( resp, '$.survey_data') as survey_data -- WORKING NOW,
FROM (
SELECT REPLACE(REPLACE(REPLACE(resp, "None,", "'None',"), "True", "true"), "False", "false") as resp
FROM `project.dataset.table`
)
if applied to sample data in your question - now it gets you all you need
I have a JSON string that returns device info and if devices are found, the devices will be listed as device0, device1, device2, etc. In this simple code below, how can I discover all devices found in the JSON and then print the the info below for each device? I currently lookup each device statically and I want this discovery to be dynamic and print the results for each one found.
r1 = requests.get(url = url_api, params = PARAMS)
devicedata = r1.json()
if 'device0' in devicedata:
print('')
device0Name = (devicedata['device0']['device_name'])
print(device0Name)
print('Temp: {}'.format (devicedata['device0']['obs'][0]['ambient_temp']))
print('Probe Temp: {}'.format (devicedata['device0']['obs'][0]['probe_temp']))
print('Humidity: {}%'.format (devicedata['device0']['obs'][0]['humidity']))
print('')
# JSON info looks like this...
{'device0': {'success': True, 'device_type': 'TX60', 'obs': [{'device_id': '1111', 'device_type': 'TX60', 'u_timestamp': '1580361017', 'ambient_temp': '45.7', 'probe_temp': '45.5', 'humidity': '82', 'linkquality': '100', 'lowbattery': '0', 'success': '9', 's_interval': '99', 'timestamp': '1/29/2020 11:10 PM', 'utctime': 1580361017}], 'alerts': {'miss': {'id': '520831', 'alert_type': 'miss', 's_id': '1111', 'max': '-100', 'min': '30', 'wet': '0', 'alert_id': '1', 'phone': 'yes', 'email': '', 'state': None}, 'batt': {'id': '520832', 'alert_type': 'batt', 's_id': '1111', 'max': '-100', 'min': '-100', 'wet': '0', 'alert_id': '1', 'phone': 'yes', 'email': '', 'state': None}}, 'ispws': 0, 'unit': {'temp': '°F', 'temp2': '°F', 'rh': '%'}, 'device_id': '1111', 'expired': '0', 'interval': '30', 'reg_date': '2020-01-17 22:06:48', 'create_date': 1579298808, 'device_name': 'Back Yard', 'assocGateway': '1', 'problem': False}, 'device1': {'success': True, 'device_type': 'TX60', 'obs': [{'device_id': '2222', 'device_type': 'TX60', 'u_timestamp': '1580360303', 'ambient_temp': '63.6', 'probe_temp': 'N/C', 'humidity': '64', 'linkquality': '100', 'lowbattery': '0', 'success': '9', 's_interval': '99', 'timestamp': '1/29/2020 10:58 PM', 'utctime': 1580360303}], 'alerts': {'miss': {'id': '520220', 'alert_type': 'miss', 's_id': '2222', 'max': '-100', 'min': '30', 'wet': '0', 'alert_id': '1', 'phone': 'yes', 'email': '', 'state': None}, 'batt': {'id': '520221', 'alert_type': 'batt', 's_id': '2222', 'max': '-100', 'min': '-100', 'wet': '0', 'alert_id': '1', 'phone': 'yes', 'email': '', 'state': None}}, 'ispws': 0, 'unit': {'temp': '°F', 'temp2': '°F', 'rh': '%'}, 'device_id': '3333', 'expired': '1', 'interval': '30', 'reg_date': '2016-03-19 01:45:04', 'create_date': 1500868369, 'device_name': 'Crawl Space', 'assocGateway': '1', 'problem': False}, 'device2': {'success': True, 'device_type': 'TX60', 'obs': [{'device_id': '3333', 'device_type': 'TX60', 'u_timestamp': '1580360195', 'ambient_temp': '70.2', 'probe_temp': 'N/C', 'humidity': '48', 'linkquality': '100', 'lowbattery': '0', 'success': '9', 's_interval': '99', 'timestamp': '1/29/2020 10:56 PM', 'utctime': 1580360195}], 'alerts': None, 'ispws': 0, 'unit': {'temp': '°F', 'temp2': '°F', 'rh': '%'}, 'device_id': '3333', 'expired': '0', 'interval': '15', 'reg_date': '2020-01-30 04:34:00', 'create_date': 1580358840, 'device_name': 'Basement', 'assocGateway': '2', 'problem': False}, 'tz': 'America/Chicago'}
The output for a single device looks like this..
Back Yard
Temp: 50.9
Probe Temp: 51.2
Humidity: 92%
Crawl Space
Temp: 65.4
Probe Temp: N/C
Humidity: 55%
Basement
Temp: 70
Probe Temp: N/C
Humidity: 48%
Found it.
for devKey in devicedata.keys():
if "device" in devKey:
dev = devicedata[devKey]
name = dev["device_name"]
obs = dev["obs"][0]
temp = obs["ambient_temp"]
probeTemp = obs["probe_temp"]
humidity = obs["humidity"]
print(name)
print('Temp: {}'.format(temp))
print('Probe Temp: {}'.format(probeTemp))
print('Humidity: {}%'.format(humidity))
print('')
Using Python 3.5, I'm trying return data from the Todoist REST api, which is in JSON format.
[{'id': 2577166691, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577166691', 'completed': False, 'order': 2, 'content': 'soon', 'priority': 1, 'comment_count': 0, 'due': {'recurring': False, 'date': '2018-04-01', 'timezone': 'UTC+10:00', 'datetime': '2018-04-01T10:00:00Z', 'string': 'Mar 31 2019'}, 'indent': 1}, {'id': 2577166849, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577166849', 'completed': False, 'order': 3, 'content': 'To City +1', 'priority': 1, 'comment_count': 0, 'due': {'recurring': False, 'date': '2018-03-31', 'string': 'Mar 31'}, 'indent': 1}, {'id': 2577225965, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577225965', 'completed': False, 'order': 4, 'content': 'To City +2', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577974095, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577974095', 'completed': False, 'order': 5, 'content': 'To City +3', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577974970, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577974970', 'completed': False, 'order': 6, 'content': 'Next train from City', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577975012, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577975012', 'completed': False, 'order': 7, 'content': 'From City +1', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577975101, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577975101', 'completed': False, 'order': 8, 'content': 'From City +2', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577975145, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577975145', 'completed': False, 'order': 9, 'content': 'From City +3', 'priority': 1, 'comment_count': 0, 'indent': 1}]
I can correctly obtain data for all items, eg
print(json_tasks[0]['id']
2577166691
And it also works for
print(json_tasks[0]['due']['recurring'])
False
print(json_tasks[0]['due']['date'])
2018-04-01
But:
print(json_tasks[0]['due']['datetime'])
'KeyError: 'datetime'
I have tried a number of things but I'm stumped. What am I doing wrong? How can I get it to recognise 'datetime' as a key?
The code below, when I ran it, printed out 2018-04-01T10:00:00Z.
json_tasks = [{'id': 2577166691, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577166691', 'completed': False, 'order': 2, 'content': 'soon', 'priority': 1, 'comment_count': 0, 'due': {'recurring': False, 'date': '2018-04-01', 'timezone': 'UTC+10:00', 'datetime': '2018-04-01T10:00:00Z', 'string': 'Mar 31 2019'}, 'indent': 1}, {'id': 2577166849, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577166849', 'completed': False, 'order': 3, 'content': 'To City +1', 'priority': 1, 'comment_count': 0, 'due': {'recurring': False, 'date': '2018-03-31', 'string': 'Mar 31'}, 'indent': 1}, {'id': 2577225965, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577225965', 'completed': False, 'order': 4, 'content': 'To City +2', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577974095, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577974095', 'completed': False, 'order': 5, 'content': 'To City +3', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577974970, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577974970', 'completed': False, 'order': 6, 'content': 'Next train from City', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577975012, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577975012', 'completed': False, 'order': 7, 'content': 'From City +1', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577975101, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577975101', 'completed': False, 'order': 8, 'content': 'From City +2', 'priority': 1, 'comment_count': 0, 'indent': 1}, {'id': 2577975145, 'project_id': 2181643136, 'url': 'https://todoist.com/showTask?id=2577975145', 'completed': False, 'order': 9, 'content': 'From City +3', 'priority': 1, 'comment_count': 0, 'indent': 1}]
print(json_tasks[0]['due']['datetime'])
I'm trying to iterate through a JSON response from a page using js2xml.
The question I have, is how do I call the 'stores' node and pass only that as my response? The JSON looks like this:
<script>
window.appData = {
"ressSize": "large",
"cssPath": "http://css.bbystatic.com/",
"imgPath": "http://images.bbystatic.com/",
"jsPath": "http://js.bbystatic.com/",
"bbyDomain": "http://www.bestbuy.com/",
"bbySslDomain": "https://www-ssl.bestbuy.com/",
"isUserLoggedIn": false,
"zipCode": "46801",
"stores": [{
"id": "2727",
"name": "GLENBROOK SQUARE",
"addr1": "4201 coldwater rd",
"addr2": "spc g10",
"city": "fort wayne",
"state": "IN",
"country": "US",
"zipCode": "46805",
"phone": "260-482-5230"...
<\script>
My spider for this is straight forward but I can't seem to come up with what I need to parse the 9th node 'stores'. This is what I've got so far:
def parse(self, response):
js = response.xpath('//script[contains(.,"window.appData")]/text()').extract_first()
jstree = js2xml.parse(js)
jstree.xpath('//assign[left//identifier[#name="appData"]]/right/*')
js2xml.make_dict(jstree.xpath('//assign[left//identifier[#name="appData"]]/right/*')[0])`
The response to this gives me:
<program>
<assign operator="=">
<left>
<dotaccessor>
<object>
<identifier name="window"/>
</object>
<property>
<identifier name="appData"/>
</property>
</dotaccessor>
</left>
<right>
<object>
<property name="ressSize">
<string>large</string>
</property>
<property name="cssPath">
<string>http://css.bbystatic.com/</string>
</property>
<property name="imgPath">
<string>http://images.bbystatic.com/</string>
</property>
<property name="jsPath">
<string>http://js.bbystatic.com/</string>
</property>
<property name="bbyDomain">
<string>http://www.bestbuy.com/</string>
</property>
<property name="bbySslDomain">
<string>https://www-ssl.bestbuy.com/</string>
</property>
<property name="isUserLoggedIn">
<boolean>false</boolean>
</property>
<property name="zipCode">
<string></string>
</property>
<property name="stores">
<array/>
</property>
<property name="preferredStores">
<array/>
</property>
</object>
</right>
</assign>
</program>
{'bbyDomain': 'http://www.bestbuy.com/',
'bbySslDomain': 'https://www-ssl.bestbuy.com/',
'cssPath': 'http://css.bbystatic.com/',
'imgPath': 'http://images.bbystatic.com/',
'isUserLoggedIn': False,
'jsPath': 'http://js.bbystatic.com/',
'preferredStores': [],
'ressSize': 'large',
'stores': [],
'zipCode': ''}
Any thoughts would be helpful!
Let's use New York as location, http://www.bestbuy.com/site/store-locator/11356
$ scrapy shell http://www.bestbuy.com/site/store-locator/11356
2016-10-10 16:19:07 [scrapy] INFO: Scrapy 1.2.0 started (bot: scrapybot)
(...)
2016-10-10 16:19:08 [scrapy] DEBUG: Crawled (200) <GET http://www.bestbuy.com/site/store-locator/11356> (referer: None)
>>> js = response.xpath('//script[contains(.,"window.appData")]/text()').extract_first()
>>> js[:100]
u'window.appData = {"ressSize":"large","cssPath":"http://css.bbystatic.com/","imgPath":"http://images.'
>>>
>>> jstree = js2xml.parse(js)
>>> app_data_node = jstree.xpath('//assign[left//identifier[#name="appData"]]/right/*')[0]
>>> app_data = js2xml.make_dict(app_data_node)
>>> app_data.keys()
['ressSize', 'isUserLoggedIn', 'preferredStores', 'jsPath', 'bbyDomain', 'bbySslDomain', 'zipCode', 'imgPath', 'cssPath', 'stores']
>>> len(app_data['stores'])
25
So you have 25 stores for New York. You can simply loop on app_data["stores"].
>>> from pprint import pprint
>>> for store in app_data['stores']:
... pprint(store)
...
{'addPreferredStoreLink': '/site/store-locator/preferred/1115',
'addr1': '13107 40th rd',
'addr2': 'ste c300',
'city': 'flushing',
'country': 'US',
'hours': [{'close': '20:00', 'date': '2016-10-09', 'open': '11:00'},
{'close': '21:00',
'closeTime': '9:00 PM',
'date': '2016-10-10',
'open': '10:00',
'openTime': '10:00 AM'},
{'close': '21:00', 'date': '2016-10-11', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-12', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-13', 'open': '10:00'},
{'close': '22:00', 'date': '2016-10-14', 'open': '10:00'},
{'close': '22:00', 'date': '2016-10-15', 'open': '10:00'},
{'close': '20:00', 'date': '2016-10-16', 'open': '11:00'},
{'close': '21:00', 'date': '2016-10-17', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-18', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-19', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-20', 'open': '10:00'},
{'close': '22:00', 'date': '2016-10-21', 'open': '10:00'},
{'close': '22:00', 'date': '2016-10-22', 'open': '10:00'}],
'hoursDisplay': {'close': '21:00',
'closeTime': '9:00 PM',
'date': '2016-10-10',
'open': '10:00',
'openTime': '10:00 AM'},
'id': '1115',
'isPreferredStore': False,
'latitude': '40.75662',
'locationSubType': 'Big Box Store',
'locationType': 'Store',
'longitude': '-73.83698',
'name': 'FLUSHING NY',
'phone': '718-888-3629',
'removePreferredStoreLink': '/site/store-locator/preferred/1115',
'services': ['Geek Squad Services',
'Best Buy Mobile',
'Best Buy For Business',
'Apple Shop',
'Electronics Recycling',
u'Hablamos Espa\xf1ol',
'Car & GPS Installation Services',
'Samsung Experience Shop',
'Windows Store'],
'state': 'NY',
'zipCode': '11354'}
(...)
{'addPreferredStoreLink': '/site/store-locator/preferred/374',
'addr1': '2478 central park ave',
'city': 'yonkers',
'country': 'US',
'hours': [{'close': '20:00', 'date': '2016-10-09', 'open': '11:00'},
{'close': '21:00',
'closeTime': '9:00 PM',
'date': '2016-10-10',
'open': '10:00',
'openTime': '10:00 AM'},
{'close': '21:00', 'date': '2016-10-11', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-12', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-13', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-14', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-15', 'open': '10:00'},
{'close': '20:00', 'date': '2016-10-16', 'open': '11:00'},
{'close': '21:00', 'date': '2016-10-17', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-18', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-19', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-20', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-21', 'open': '10:00'},
{'close': '21:00', 'date': '2016-10-22', 'open': '10:00'}],
'hoursDisplay': {'close': '21:00',
'closeTime': '9:00 PM',
'date': '2016-10-10',
'open': '10:00',
'openTime': '10:00 AM'},
'id': '374',
'isPreferredStore': False,
'latitude': '40.9814',
'locationSubType': 'Big Box Store',
'locationType': 'Store',
'longitude': '-73.8277',
'name': 'YONKERS NY',
'phone': '914-337-4077',
'removePreferredStoreLink': '/site/store-locator/preferred/374',
'services': ['Windows Store',
'Geek Squad Services',
'Best Buy Mobile',
'Best Buy For Business',
'Apple Shop',
'Electronics Recycling',
u'Hablamos Espa\xf1ol',
'Samsung Experience',
'LG Experience ',
'Sony Experience ',
'Car & GPS Installation Services'],
'state': 'NY',
'zipCode': '10710'}
>>>
In your Scrapy callback, you can translate this like this:
def parse(self, response):
js = response.xpath('//script[contains(.,"window.appData")]/text()').extract_first()
jstree = js2xml.parse(js)
app_data_node = jstree.xpath('//assign[left//identifier[#name="appData"]]/right/*')[0]
app_data = js2xml.make_dict(app_data_node)
for store in app_data['stores']:
yield store