Extarct particulr part of json string using python regex - json

I have below json string:
"{"sweep_enabled":true,"product":"XYZ","page":"XYZ Profile","list":" {\"id\":205782,\"name\":\"Robert Shriwas\",\"gender\":\"F\",\"practicing_since\":null,\"years\":21,\"specializations\":[\"Mentor\"]}","form":{"q":"","city":"Delhi","locality":null},"cerebro":true}"
I want to extract list part out of above string:
{\"id\":205782,\"name\":\"Robert Shriwas\",\"gender\":\"F\",\"practicing_since\":null,\"years\":21,\"specializations\":[\"Mentor\"]}
How can I do this using python regex?

There is a problem in your JSON, it encloses another json object in the double quotes and is causing json.loads to fail. Try doing some transformation on json string before passing to json.loads.
As following works perfectly.
>>> p = json.loads('''{"sweep_enabled":true,"product":"XYZ","page":"XYZ Profile","list":{\"id\":205782,\"name\":\"Robert Shriwas\",\"gender\":\"F\",\"practicing_since\":null,\"years\":21,\"specializations\":[\"Mentor\"]},"form":{"q":"","city":"Delhi","locality":null},"cerebro":true}''')
And you extract the requited part as
>>> p["list"]
{u'name': u'Robert Shriwas', u'gender': u'F', u'specializations': [u'Mentor'], u'id': 205782, u'years': 21, u'practicing_since': None}
Check this out I could manage to correct the json you provided.
>>> p = '''{"sweep_enabled":true,"product":"XYZ","page":"XYZ Profile","list":" {\"id\":205782,\"name\":\"Robert Shriwas\",\"gender\":\"F\",\"practicing_since\":null,\"years\":21,\"specializations\":[\"Mentor\"]}","form":{"q":"","city":"Delhi","locality":null},"cerebro":true}'''
>>> q = re.sub(r'(:)\s*"\s*(\{[^\}]+\})\s*"',r'\1\2', p[1:-1])
>>> q
'"sweep_enabled":true,"product":"XYZ","page":"XYZ Profile","list":{"id":205782,"name":"Robert Shriwas","gender":"F","practicing_since":null,"years":21,"specializations":["Mentor"]},"form":{"q":"","city":"Delhi","locality":null},"cerebro":true'
>>> r = p[0] + q + p[-1]
>>> r
'{"sweep_enabled":true,"product":"XYZ","page":"XYZ Profile","list":{"id":205782,"name":"Robert Shriwas","gender":"F","practicing_since":null,"years":21,"specializations":["Mentor"]},"form":{"q":"","city":"Delhi","locality":null},"cerebro":true}'
>>> json.loads(r)
{u'product': u'XYZ', u'form': {u'q': u'', u'city': u'Delhi', u'locality': None}, u'sweep_enabled': True, u'list': {u'name': u'Robert Shriwas', u'gender': u'F', u'specializations': [u'Mentor'], u'id': 205782, u'years': 21, u'practicing_since': None}, u'cerebro': True, u'page': u'XYZ Profile'}
>>> s = json.loads(r)
>>> s['list']
{u'name': u'Robert Shriwas', u'gender': u'F', u'specializations': [u'Mentor'], u'id': 205782, u'years': 21, u'practicing_since': None}
>>>

Related

How to stop DjangoJSONEncoder from truncating microseconds datetime objects?

I have a dictionary with a datetime object inside it and when I try to json dump it, Django truncates the microseconds:
> dikt
{'date': datetime.datetime(2020, 6, 22, 11, 36, 25, 763835, tzinfo=<DstTzInfo 'Africa/Nairobi' EAT+3:00:00 STD>)}
> json.dumps(dikt, cls=DjangoJSONEncoder)
'{"date": "2020-06-22T11:36:25.763+03:00"}'
How can I preserve all the 6 microsecond digits?
DjangoJsonEncoder support ECMA-262 specification.
You can easily overcome this by introducing your custom encoder.
class MyCustomEncoder(DjangoJSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
r = obj.isoformat()
if r.endswith('+00:00'):
r = r[:-6] + 'Z'
return r
return super(MyCustomEncoder, self).default(obj)
dateime_object = datetime.datetime.now()
print(dateime_object)
print(json.dumps(dateime_object, cls=MyCustomEncoder))
>>> 2020-06-22 11:54:29.127120
>>> "2020-06-22T11:54:29.127120"

Unable to resolve TypeError: Object of type 'map' is not JSON serializable

Error while parsing map to string using json.dumps in python 3.6
x = {'id_str': '639035115457388544', 'video': False, 'photo': False, 'link': True, 'hashtags': <map object at 0x7f1762ab9320>, 'coordinates': None, 'timestamp_ms': 1441218018000, 'text': 'Police suspected hit-and-run', 'user': {'id': 628694263, 'name': 'Beth LeBlanc', 'friends_count': 235, 'verified': False, 'followers_count': 654, 'created_at': 1341631106000, 'time_zone': None, 'statuses_count': 3966, 'protected': 3966}, 'mentions': [], 'screen_name': 'THBethLeBlanc', 'reply': None, 'tweet_type': 'Tweet', 'mentionedurl': None, 'possibly_sensitive': False, 'placename': '', 'sentiments': 'Undefined'}
print(json.dumps(x))
TypeError: Object of type 'map' is not JSON serializable
I don't know how you get value for 'hashtags', but this below example will help you to solve your question a little bit. Surround your map object with list().
>>> import json
>>>
>>> some_map_value = map([],[])
>>> some_map_value
<map object at 0x7f380a75a850>
>>>
>>> x = {'hashtags': some_map_value}
>>> x
{'hashtags': <map object at 0x7f380a75a850>}
>>>
>>> json.dumps(x)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python3.7/json/__init__.py", line 231, in dumps
return _default_encoder.encode(obj)
File "/usr/lib/python3.7/json/encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/usr/lib/python3.7/json/encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "/usr/lib/python3.7/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type map is not JSON serializable
>>>
>>> list(some_map_value)
[]
>>> x = {'hashtags': list(some_map_value)} # surround your map object with list
>>> json.dumps(x)
'{"hashtags": []}'
For more information check this Getting a map() to return a list in Python 3.x
Ask Question question. If this is not you are lokking for, please put a comment to this answer.
Update: Just check your comment. Surround your map(lambda x: x['text'],doc['entities']['hashtags']) with list() like list(map(lambda x: x['text'],doc['entities']['hashtags']))
if doc['entities'].get('media'):
tweet['photo'] = True
if doc.get('extended_entities'):
tweet[doc['extended_entities']['media'][0]['type']] = True
tweet['mediaurl'] = doc['extended_entities']['media'][0]['media_url']
if doc['entities'].get('urls'):
tweet['link'] = True
tweet['hashtags'] = list(map(lambda x: x['text'],doc['entities']['hashtags']))
tweet['coordinates'] = doc['coordinates']
There is an error in your x where the hashtags key has no corresponding value. Here it is fixed:
https://repl.it/repls/SubtleLovableSystemadministrator

Read json file using python

Team, i have my code working with dict in comments below. Now, i want to move that to file.json and read from there keeping the logic same. any hints how can i represent my comment section below in json and read it backin dictionary?
convert python dictionary to json file and read in python
'''
source_cidr_name = {
'location1' : ("1.1.1.1/32", [22, 443]),
'location2' : ("2.2.2.2/32", [443])}
'''
source_cidr_name = {}
with open('/Users/code/dev/whitelist.json') as jf:
source_cidr_name = json.load(jf)
file.json looks below but i don't know how to represent my ports 22 and 443
{
"source_cidr_whitelist": {"key1": {"ip_subnet": "1.1.1.1/32"}, "key2": {"ip_subnet": "2.2.2.2/32"]}}}
JSON i.e. JavaScript Object Notaion does not support including single quotes ' around strings and ( or ) arounf lists/arrays while using json.loads() to convert string representation to object (dictionary).
Using json.dumps() is okay with tuples/list, single/double quotes etc. as Python internally converts them in proper valid JSON. Mean to say it converts ( ) used to represent/surround tuples to [ ] and ' used to represent/surround strings to ".
My suggestion to get rid of this kind of problems
While dumping/writing data to JSON file use json.dumps() to create string from an existing Python objects like list/dictionary (JSON convertible).
After that if you will read the same at any point of time later, it would be easily converted to the real Python object list/dictionary etc. using json.loads() from strings or using json.load() from file like objects.
An example which helped to figure out
>>> import json
>>>
>>> d = {
... 'location1' : ("1.1.1.1/32", [22, 443]),
... 'location2' : ("2.2.2.2/32", [443])}
>>>
>>> s = json.dumps(d, indent=4)
>>> print(s)
{
"location2": [
"2.2.2.2/32",
[
443
]
],
"location1": [
"1.1.1.1/32",
[
22,
443
]
]
}
>>>
>>> new_d = json.loads(s)
>>>
>>> new_d
{u'location2': [u'2.2.2.2/32', [443]], u'location1': [u'1.1.1.1/32', [22, 443]]}
>>>
>>> new_d['location2']
[u'2.2.2.2/32', [443]]
>>>
>>> new_d['location1']
[u'1.1.1.1/32', [22, 443]]
>>>
>>> new_d['location1'][0]
u'1.1.1.1/32'
>>> new_d['location1'][1]
[22, 443]
>>>
>>> new_d['location1'][1][0]
22
>>> new_d['location1'][1][1]
443
>>>
>>> # NEW (start from string)
...
>>> s = '''{
... 'location1' : ("1.1.1.1/32", [22, 443]),
... 'location2' : ("2.2.2.2/32", [443])}'''
>>> print(s)
{
'location1' : ("1.1.1.1/32", [22, 443]),
'location2' : ("2.2.2.2/32", [443])}
>>>
1st try
>>> s = '''{'location1' : ["1.1.1.1/32", [22, 443]],'location2' : ["2.2.2.2/32", [443]]}'''
>>> s
'{\'location1\' : ["1.1.1.1/32", [22, 443]],\'location2\' : ["2.2.2.2/32", [443]]}'
>>>
>>> print(s)
{'location1' : ["1.1.1.1/32", [22, 443]],'location2' : ["2.2.2.2/32", [443]]}
>>>
>>> d = json.loads(s)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/Cellar/python#2/2.7.15_2/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "/usr/local/Cellar/python#2/2.7.15_2/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/Cellar/python#2/2.7.15_2/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 380, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting property name: line 1 column 2 (char 1)
>>>
2nd try
>>>
>>> s = '''{
... "location1" : ("1.1.1.1/32", [22, 443]),
... "location2" : ("2.2.2.2/32", [443])}'''
>>>
>>> d = json.loads(s)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/Cellar/python#2/2.7.15_2/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "/usr/local/Cellar/python#2/2.7.15_2/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/Cellar/python#2/2.7.15_2/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 382, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
>>>
>>>
Finally
>>> s = '''{
... "location1" : ["1.1.1.1/32", [22, 443]],
... "location2" : ["2.2.2.2/32", [443]]}'''
>>>
>>> d = json.loads(s)
>>> d
{u'location2': [u'2.2.2.2/32', [443]], u'location1': [u'1.1.1.1/32', [22, 443]]}
>>>
>>> type(d)
<type 'dict'>
>>>
>>> d['location2']
[u'2.2.2.2/32', [443]]
>>>
>>> d['location2'][1][0]
443
>>>

Unable to make dns-over-https with cloudflare and python requests

I'm trying to write a quick script that could do dns lookups using the new 1.1.1.1 DNS over HTTPS public DNS server from CloudFlare.
Looking at their docs here https://developers.cloudflare.com/1.1.1.1/dns-over-https/json-format/ I'm not sure what I'm doing wrong and why I'm getting a 415 status code (415 Unsupported content type).
Here is my script:
#!/usr/bin/env python
import requests
import json
from pprint import pprint
url = 'https://cloudflare-dns.com/dns-query'
client = requests.session()
json1 = {'name': 'example.com','type': 'A'}
ae = client.get(url, headers = {'Content-Type':'application/dns-json'}, json = json1)
print ae.raise_for_status()
print ae.status_code
print ae.json()
client.close()
Here is the output:
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 415 Client Error: Unsupported Media Type for url: https://cloudflare-dns.com/dns-query
and for the json response (expected I believe):
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
Using curl this works perfectly fine.
Many thanks
You should not set a JSON request at all. The response uses JSON.
Put the application/dns-json value in a ct parameter:
JSON formatted queries are sent using a GET request. When making requests using GET, the DNS query is encoded into the URL. An additional URL parameter of ‘ct’ should indicate the MIME type (application/dns-json).
A GET request never has a body, so don't try to send JSON:
params = {
'name': 'example.com',
'type': 'A',
'ct': 'application/dns-json',
}
ae = client.get(url, params=params)
Demo:
>>> import requests
>>> url = 'https://cloudflare-dns.com/dns-query'
>>> client = requests.session()
>>> params = {
... 'name': 'example.com',
... 'type': 'A',
... 'ct': 'application/dns-json',
... }
>>> ae = client.get(url, params=params)
>>> ae.status_code
200
>>> from pprint import pprint
>>> pprint(ae.json())
{'AD': True,
'Answer': [{'TTL': 2560,
'data': '93.184.216.34',
'name': 'example.com.',
'type': 1}],
'CD': False,
'Question': [{'name': 'example.com.', 'type': 1}],
'RA': True,
'RD': True,
'Status': 0,
'TC': False}

Using JSON keys as attributes in nested JSON

I'm working with nested JSON-like data structures in python 2.7 that I exchange with some foreign perl code. I just want to 'work with' these nested structures of lists and dictionaries in amore pythonic way.
So if I have a structure like this...
a = {
'x': 4,
'y': [2, 3, { 'a': 55, 'b': 66 }],
}
...I want to be able to deal with it in a python script as if it was nested python classes/Structs, like this:
>>> aa = j2p(a) # <<- this is what I'm after.
>>> print aa.x
4
>>> aa.z = 99
>>> print a
{
'x': 4,
'y': [2, 3, { 'a': 55, 'b': 66 }],
'z': 99
}
>>> aa.y[2].b = 999
>>> print a
{
'x': 4,
'y': [2, 3, { 'a': 55, 'b': 999 }],
'z': 99
}
Thus aa is a proxy into the original structure. This is what I came up with so far, inspired by the excellent What is a metaclass in Python? question.
def j2p(x):
"""j2p creates a pythonic interface to nested arrays and
dictionaries, as returned by json readers.
>>> a = { 'x':[5,8], 'y':5}
>>> aa = j2p(a)
>>> aa.y=7
>>> print a
{'x': [5, 8], 'y':7}
>>> aa.x[1]=99
>>> print a
{'x': [5, 99], 'y':7}
>>> aa.x[0] = {'g':5, 'h':9}
>>> print a
{'x': [ {'g':5, 'h':9} , 99], 'y':7}
>>> print aa.x[0].g
5
"""
if isinstance(x, list):
return _list_proxy(x)
elif isinstance(x, dict):
return _dict_proxy(x)
else:
return x
class _list_proxy(object):
def __init__(self, proxied_list):
object.__setattr__(self, 'data', proxied_list)
def __getitem__(self, a):
return j2p(object.__getattribute__(self, 'data').__getitem__(a))
def __setitem__(self, a, v):
return object.__getattribute__(self, 'data').__setitem__(a, v)
class _dict_proxy(_list_proxy):
def __init__(self, proxied_dict):
_list_proxy.__init__(self, proxied_dict)
def __getattribute__(self, a):
return j2p(object.__getattribute__(self, 'data').__getitem__(a))
def __setattr__(self, a, v):
return object.__getattribute__(self, 'data').__setitem__(a, v)
def p2j(x):
"""p2j gives back the underlying json-ic json-ic nested
dictionary/list structure of an object or attribute created with
j2p.
"""
if isinstance(x, (_list_proxy, _dict_proxy)):
return object.__getattribute__(x, 'data')
else:
return x
Now I wonder whether there is an elegant way of mapping a whole set of the __*__ special functions, like __iter__, __delitem__? so I don't need to unwrap things using p2j() just to iterate or do other pythonic stuff.
# today:
for i in p2j(aa.y):
print i
# would like to...
for i in aa.y:
print i
I think you're making this more complex than it needs to be. If I understand you correctly, all you should need to do is this:
import json
class Struct(dict):
def __getattr__(self, name):
return self[name]
def __setattr__(self, name, value):
self[name] = value
def __delattr__(self, name):
del self[name]
j = '{"y": [2, 3, {"a": 55, "b": 66}], "x": 4}'
aa = json.loads(j, object_hook=Struct)
for i in aa.y:
print(i)
When you load JSON, the object_hook parameter lets you specify a callable object to process objects that it loads. I've just used it to turn the dict into an object that allows attribute access to its keys. Docs
There is an attrdict library that does exactly that in a very safe manner, but if you want, a quick and dirty (possibly leaking memory) approach was given in this answer:
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
j = '{"y": [2, 3, {"a": 55, "b": 66}], "x": 4}'
aa = json.loads(j, object_hook=AttrDict)
I found the answer: There is intentionally no way to automatically map the special methods in python, using __getattribute__. So to achieve what I want, I need to explicitely define all special methods like __len__ one after the other.