Receive IPs from JSON file - json

I am trying to get all IPs from a JSON file using Python 2.7.5
However I can not manage to do it correctly.
Do someone have an advice how I can receive all IPs from ('addressPrefixes') in a txt file?
Here is the code I already got to download the json file:
import urllib
import json
from urllib import urlopen
testfile = urllib.URLopener()
testfile.retrieve("https://download.microsoft.com/download/7/1/D/71D86715-5596-4529-9B13-
DA13A5DE5B63/ServiceTags_Public_20210426.json", "AzureIPs.json")
print("---SUCCESSFULLY RECEIVED MICROSOFT AZURE IPS---")
with open('AzureIPs.json','r') as f:
data = json.load(f)
the JSON file contains many IPs and IP Ranges and looks like this:
{
"changeNumber": 145,
"cloud": "Public",
"values": [
{
"name": "ActionGroup",
"id": "ActionGroup",
"properties": {
"changeNumber": 9,
"region": "",
"regionId": 0,
"platform": "Azure",
"systemService": "ActionGroup",
"addressPrefixes": [
"13.66.60.119/32",
"13.66.143.220/30",
"13.66.202.14/32",
"13.66.248.225/32",
"13.66.249.211/32",
"13.67.10.124/30",
"13.69.109.132/30",
"13.71.199.112/30",
"13.77.53.216/30",
"13.77.172.102/32",
"13.77.183.209/32",
"13.78.109.156/30",
"13.84.49.247/32",
"2603:1030:c06:400::978/125",
"2603:1030:f05:402::178/125",
"2603:1030:1005:402::178/125",
"2603:1040:5:402::178/125",
"2603:1040:207:402::178/125",
"2603:1040:407:402::178/125",
"2603:1040:606:402::178/125",
"2603:1040:806:402::178/125",
"2603:1040:904:402::178/125",
"2603:1040:a06:402::178/125",
"2603:1040:b04:402::178/125",
"2603:1040:c06:402::178/125",
"2603:1040:d04:800::f8/125",
"2603:1040:f05:402::178/125",
"2603:1040:1104:400::178/125",
"2603:1050:6:402::178/125",
"2603:1050:403:400::1f8/125"
],
"networkFeatures": [
"API",
"NSG",
"UDR",
"FW"
]
}
},
{
"name": "ApplicationInsightsAvailability",
"id": "ApplicationInsightsAvailability",
"properties": {
"changeNumber": 2,
"region": "",
"regionId": 0,
"platform": "Azure",
"systemService": "ApplicationInsightsAvailability",
"addressPrefixes": [
"13.86.97.224/27",
"13.86.98.0/27",
"13.86.98.48/28",
"13.86.98.64/28",
"20.37.156.64/27",
"20.37.192.80/29",
"20.38.80.80/28",
"20.40.104.96/27",
"20.40.104.128/27",
"20.40.124.176/28",
"20.40.124.240/28",
"20.40.125.80/28",
"20.40.129.32/27",
"20.40.129.64/26",
"20.40.129.128/27",
"20.42.4.64/27",
"20.42.35.32/28",
"20.42.35.64/26",
"20.42.35.128/28",
"20.42.129.32/27",
"20.43.40.80/28",
"20.43.64.80/29",
"20.43.128.96/29",
"20.45.5.160/27",
"20.45.5.192/26",
"20.189.106.64/29",
"23.100.224.16/28",
"23.100.224.32/27",
"23.100.224.64/26"
],
"networkFeatures": [
"API",
"NSG",
"UDR",
"FW"
]
}
},
{
"name": "AzureActiveDirectory",
"id": "AzureActiveDirectory",
"properties": {
"changeNumber": 8,
"region": "",
"regionId": 0,
"platform": "Azure",
"systemService": "AzureAD",
"addressPrefixes": [
"13.64.151.161/32",
"13.66.141.64/27",
"13.67.9.224/27",
"13.69.66.160/27",
"13.69.229.96/27",
"13.70.73.32/27"
],
"networkFeatures": [
"API",
"NSG",
"UDR",
"FW",
"VSE"
]
}
}
Thank you for your time.

import urllib
import json
from urllib import urlopen
testfile = urllib.URLopener()
testfile.retrieve("https://download.microsoft.com/download/7/1/D/71D86715-5596-4529-9B13-
DA13A5DE5B63/ServiceTags_Public_20210426.json", "AzureIPs.json")
print("---SUCCESSFULLY RECEIVED MICROSOFT AZURE IPS---")
with open('AzureIPs.json','r') as f:
data = json.load(f)
################# CHANGES AFTER THIS LINE #################
ips = []
values = data['values']
for block in values:
ips.append(block.properties.addressPrefixes)
However you will get 2D array using this approach, if you need 1D array and not separate block of IPs from each corresponding block in values, you can use following code to flatten the array.
import numpy as np
2DArray = np.array(ips)
1DArray = 2DArray.flatten()

Related

json_normalize does not read all data

I have a json file that I want to flatten and retrieve all the information into a pandas dataframe. The json file looks like this:
jsonstr = {
"calculation": {
"id": "3k3k3k3kwk3kwk",
"Id": 23,
"submissionDate": 1622428064679,
"serverVersion": "3.3.5.6.r",
"tag": [
{
"code": "qq4059331155113278",
"manual": {
"location": {
"x": 26.5717,
"y": 59.4313,
"z": 0.0,
"floor": 0
},
"timestamp": 1599486138000
},
"device": null,
"measurements": [
{
"Address": "D_333",
"subcell": "",
"frequency": 14.0,
"dfId": 0
},
{
"trxAddress": "D_334",
"subcell": "",
"frequency": 11.0,
"dfId": 0
}]
}]
}
}
Now, as usual, I do the following. I thought that this would return all the "fields", including id, Id, submissionDate and so on
import os, json
import pandas as pd
import numpy as np
import glob
pd.set_option('display.max_columns', None)
file = './Testjson.json'
#file = './jsondumps/ff80818178f93bd90179ab51781e1c95.json'
with open(file) as json_string:
jsonstr = json.load(json_string)
labels = pd.json_normalize(jsonstr, record_path=['calculation','tag'])
But in fact, it returns:
code device \
0 qq4059331155113278 None
measurements manual.location.x \
0 [{'Address': 'D_333', 'subcell': '', 'frequenc... 26.5717
manual.location.y manual.location.z manual.location.floor \
0 59.4313 0.0 0
manual.timestamp
0 1599486138000
and trying the following
labels = pd.json_normalize(jsonstr, record_path=['calculation','tag'], meta=['id', 'Id'])
returns an error:
KeyError: 'id'
which makes sense. But What am I doing wrong to begin with? Why can I not get all the fields under calculation since they are in the path?
Greatful for any insights!
Your syntax is slightly off on the meta argument. id and Id are at the end of the dataframe.
If you are looking to flatten the entire json, look into flatten_json. It's a pretty good library to use with nested json.
pd.json_normalize(jsonstr, record_path=['calculation','tag'], meta=[['calculation','id'],['calculation','Id']])
code device measurements manual.location.x manual.location.y manual.location.z manual.location.floor manual.timestamp calculation.id calculation.Id
0 qq4059331155113278 null [{'Address': 'D_333', 'subcell': '', 'frequenc... 26.5717 59.4313 0.0 0 1599486138000 3k3k3k3kwk3kwk 23

How to check JSON data in Python

Data sample:
import pandas as pd
patients_df = pd.read_json('C:/MyWorks/Python/Anal/data_sample.json', orient="records", lines=True)
patients_df.head()
//in python
//my json data sample
"data1": {
"id": "myid",
"seatbid": [
{
"bid": [
{
"id": "myid",
"impid": "1",
"price": 0.46328014,
"adm": "adminfo",
"adomain": [
"domain.com"
],
"iurl": "url.com",
"cid": "111",
"crid": "1111",
"cat": [
"CAT-0101"
],
"w": 00,
"h": 00
}
],
"seat": "27"
}
],
"cur": "USD"
},
What I want to do is to check if there is a "cat" value in my very large JSON data.
The "cat" value may/may not exist, but I'm trying to use Python Pandas to check it.
for seatbid in patients_df["win_res"]:
for bid in seatbid["seatbid"]:
I tried to access JSON data while writing a loop like that, but it's not being accessed properly.
I simply want to check if "cat" exist or not.
You can use python's json library as follows:
import json
patient_data = json.loads(patientJson)
if "cat" in student:
print("Key exist in JSON data")
else
print("Key doesn't exist in JSON data")

Combine multiple JSON files, and parse into CSV

I have about 100 JSON files, all titled with different dates and I need to merge them into one CSV file that has headers "date", "real_name", "text".
There are no dates listed in the JSON itself, and the real_name is nested. I haven't worked with JSON in a while and am a little lost.
The basic structure of the JSON looks more or less like this:
Filename: 2021-01-18.json
[
{
"client_msg_id": "xxxx",
"type": "message",
"text": "THIS IS THE TEXT I WANT TO PULL",
"user": "XXX",
"user_profile": {
"first_name": "XXX",
"real_name": "THIS IS THE NAME I WANT TO PULL",
"display_name": "XXX",
"is_restricted": false,
"is_ultra_restricted": false
},
"blocks": [
{
"type": "rich_text",
"block_id": "yf=A9",
}
]
}
]
So far I have
import glob
read_files = glob.glob("*.json")
output_list = []
all_items = []
for f in read_files:
with open(f, "rb") as infile:
output_list.append(json.load(infile))
data = {}
for obj in output_list[]
data['date'] = f
data['text'] = 'text'
data['real_name'] = 'real_name'
all_items.append(data)
Once you've read the JSON object, just index into the dictionaries for the data. You might need obj[0]['text'], etc., if your JSON data is really in a list in each file, but that seems odd and I'm assuming your data was pasted from output_list after you'd collected the data. So assuming your file content is exactly like below:
{
"client_msg_id": "xxxx",
"type": "message",
"text": "THIS IS THE TEXT I WANT TO PULL",
"user": "XXX",
"user_profile": {
"first_name": "XXX",
"real_name": "THIS IS THE NAME I WANT TO PULL",
"display_name": "XXX",
"is_restricted": false,
"is_ultra_restricted": false
},
"blocks": [
{
"type": "rich_text",
"block_id": "yf=A9",
}
]
}
test.py:
import json
import glob
from pathlib import Path
read_files = glob.glob("*.json")
output_list = []
all_items = []
for f in read_files:
with open(f, "rb") as infile:
output_list.append(json.load(infile))
data = {}
for obj in output_list:
data['date'] = Path(f).stem
data['text'] = obj['text']
data['real_name'] = obj['user_profile']['real_name']
all_items.append(data)
print(all_items)
Output:
[{'date': '2021-01-18', 'text': 'THIS IS THE TEXT I WANT TO PULL', 'real_name': 'THIS IS THE NAME I WANT TO PULL'}]

JSON metedata file writing nestled floats to .txt or .csv

Very new to python so please forgive me if this is a silly question but I have been attempting to loop an extraction of certain information within a .json file (specifically the date and one value in particular) in order to create a time series. Due to me having over 300 files I would like this to be done automatically, in order to easily create a time series of certain values. I have managed to print the data, however have failed to extract this information to a text file that would be readable in something like excel.
Please find attached both the example .json file I am trying to extract and my code so far. Thanks!
{
"AcquasitionInfo": {
"Date": {
"Day": 27,
"Month": 3,
"Year": 2011
},
"EarthSunDistance": 0.9977766,
"SolarAzimuth": 154.94013617,
"SolarZenith": 53.1387049,
"Time": {
"Hour": 11,
"Minute": 0,
"Second": 21
},
"sensorAzimuth": 0.0,
"sensorZenith": 0.0
},
"FileInfo": {
"CLOUD_MASK": "LS5TM_20110327_lat53lon354_r23p204_clouds.kea",
"FileBaseName": "LS5TM_20110327_lat53lon354_r23p204",
"IMAGE_DEM": "LS5TM_20110327_lat53lon354_r23p204_dem.kea",
"METADATA": "LS5TM_20110327_lat53lon354_r23p204_meta.json",
"ProviderMetadata": "LT05_L1TP_204023_20110327_20161208_01_T1_MTL.txt",
"RADIANCE": "LS5TM_20110327_lat53lon354_r23p204_vmsk_mclds_rad.kea",
"RADIANCE_WHOLE": "LS5TM_20110327_lat53lon354_r23p204_vmsk_rad.kea",
"SREF_6S_IMG": "LS5TM_20110327_lat53lon354_r23p204_vmsk_mclds_topshad_rad_srefdem.kea",
"STD_SREF_IMG": "LS5TM_20110327_lat53lon354_r23p204_vmsk_mclds_topshad_rad_srefdem_stdsref.kea",
"THERMAL_BRIGHT": "LS5TM_20110327_lat53lon354_r23p204_vmsk_thrad_thermbright.kea",
"THERMAL_BRIGHT_WHOLE": "LS5TM_20110327_lat53lon354_r23p204_vmsk_thrad_thermbright.kea",
"THERM_RADIANCE_WHOLE": "LS5TM_20110327_lat53lon354_r23p204_vmsk_thermrad.kea",
"TOA": "LS5TM_20110327_lat53lon354_r23p204_vmsk_mclds_rad_toa.kea",
"TOA_WHOLE": "LS5TM_20110327_lat53lon354_r23p204_vmsk_rad_toa.kea",
"TOPO_SHADOW_MASK": "LS5TM_20110327_lat53lon354_r23p204_toposhad.kea",
"VALID_MASK": "LS5TM_20110327_lat53lon354_r23p204_valid.kea",
"VIEW_ANGLE": "LS5TM_20110327_lat53lon354_r23p204_viewangle.kea"
},
"ImageInfo": {
"CellSizeRefl": 30.0,
"CellSizeTherm": 30.0,
"CloudCover": 52.0,
"CloudCoverLand": 79.0
},
"LocationInfo": {
"Geographical": {
"BBOX": {
"BLLat": 52.06993,
"BLLon": -5.34028,
"BRLat": 52.08621,
"BRLon": -1.72003,
"TLLat": 54.09075,
"TLLon": -5.45257,
"TRLat": 54.10827,
"TRLon": -1.65856
},
"CentreLat": 53.10330325240661,
"CentreLon": -3.5429440927905724
},
"Projected": {
"BBOX": {
"BLX": 354735.0,
"BLY": 5776815.0,
"BRX": 572985.0,
"BRY": 5776815.0,
"TLX": 354735.0,
"TLY": 5992035.0,
"TRX": 572985.0,
"TRY": 5992035.0
},
"CentreX": 463860.0,
"CentreY": 5884425.0,
"VPOLY": {
"MaxXX": 572985.0,
"MaxXY": 5950185.0,
"MaxYX": 405795.0,
"MaxYY": 5992035.0,
"MinXX": 354735.0,
"MinXY": 5819025.0,
"MinYX": 521775.0,
"MinYY": 5776815.0
}
}
},
"ProductsInfo": {
"ARCSIProducts": [
"CLOUDS",
"DOSAOTSGL",
"STDSREF",
"METADATA"
],
"ARCSI_AOT_RANGE_MAX": 0.5,
"ARCSI_AOT_RANGE_MIN": 0.05,
"ARCSI_AOT_VALUE": 0.5,
"ARCSI_CLOUD_COVER": 0.627807080745697,
"ARCSI_LUT_ELEVATION_MAX": 1100,
"ARCSI_LUT_ELEVATION_MIN": -100,
"ProcessDate": {
"Day": 11,
"Month": 7,
"Year": 2018
},
"ProcessTime": {
"Hour": 7,
"Minute": 24,
"Second": 55
}
},
"SensorInfo": {
"ARCSISensorName": "LS5TM",
"Path": 204,
"Row": 23,
"SensorID": "TM",
"SpacecraftID": "LANDSAT_5"
},
"SoftwareInfo": {
"Name": "ARCSI",
"URL": "http://www.rsgislib.org/arcsi",
"Version": "3.1.4"
} }
import glob
import json
jsonfile = glob.glob('*.json')
with open(jsonfile[0]) as f:
data = json.load(f)
print(data["AcquasitionInfo"]["Date"]["Day"])
print(data["AcquasitionInfo"]["Date"]["Month"])
print(data["AcquasitionInfo"]["Date"]["Year"])
print(data["ProductsInfo"]["ARCSI_AOT_VALUE"])
with open('data.txt', 'w') as outfile:
json.dump(["ProductsInfo"]["ARCSI_AOT_VALUE"], outfile)
You forgot the data in the last line :
import glob
import json
jsonfile = glob.glob('*.json')
with open(jsonfile[0]) as f:
data = json.load(f)
print(data["AcquasitionInfo"]["Date"]["Day"])
print(data["AcquasitionInfo"]["Date"]["Month"])
print(data["AcquasitionInfo"]["Date"]["Year"])
print(data["ProductsInfo"]["ARCSI_AOT_VALUE"])
with open('data.txt', 'w') as outfile:
json.dump(data["ProductsInfo"]["ARCSI_AOT_VALUE"], outfile)
EDIT:
You can do like this :
import json
import os
for file in os.listdir("."):
if file.endswith(".json"):
with open(file) as f:
data = json.load(f)
with open('data.txt', 'a') as outfile:
json.dump(data["ProductsInfo"]["ARCSI_AOT_VALUE"], outfile)
outfile.write(';')

build a dynamic JSON object from the MONGODB , create a .json file and save the object to it

Lets say we have our MongoDB and we want to backup the data into a .json file example for an output file : database.json and inside :
{
"collections": [
{"name": "admin"},
{"name": "class"},
{"name": "lesson"},
{"name": "message"},
{"name": "room"},
{"name": "student"},
{"name": "subject"},
{"name": "teacher"}
],
"subjects": [
{
"name": "Null",
"color": "#FFFFFF"
},
{
"name": "Design Art",
"color": "#82B9D6"
},
{
"name": "Plastic Art",
"color": "#a3db05"
},
{
"name": "Media And Production",
"color": "#522a64"
}, //...there is a continue to this file ....
}
each collection should be added to the collections and for each collection there should be an array of all the info inside it (like above)
I'm using python 3.4 with the pymongo driver.
What is the best way to get all the info from the DB , create the JSON object and insert it to a new .json file
I found this way of doing it :
import json
from pymongo import MongoClient
from pprint import pprint
def main():
with open('../config/database.json') as database_config:
config = json.load(database_config)
client = MongoClient(config["mongodb"])
db = client[config["database"]]
data = dict()
data["collections"] = db.collection_names()
for collection_name in db.collection_names():
data[collection_name] = get_collection(db, collection_name)
pprint(data)
insert_data_to_file(data)
def get_collection(db, collection_name):
collection_list = []
collection = db[collection_name]
cursor = collection.find({})
for document in cursor:
_id = document.pop('_id')
document['_id'] = str(_id)
collection_list.append(document)
return collection_list
def insert_data_to_file(data):
with open('database.json', 'x') as database:
json.dump(data, database, sort_keys=True)
main()