How to save twitterscraper output as json file - json

I read the documentation, but the documentation only mentions saving output as .txt file. I tried to modify the code to save output as JSON.
save as .txt:
from twitterscraper import query_tweets
if __name__ == '__main__':
list_of_tweets = query_tweets("Trump OR Clinton", 10)
#print the retrieved tweets to the screen:
for tweet in query_tweets("Trump OR Clinton", 10):
print(tweet)
#Or save the retrieved tweets to file:
file = open(“output.txt”,”w”)
for tweet in query_tweets("Trump OR Clinton", 10):
file.write(tweet.encode('utf-8'))
file.close()
I tried to modify this to save as JSON:
output = query_tweets("Trump OR Clinton", 10)
jsonfile = open("tweets.json","w")
for tweet in output:
json.dump(tweet,jsonfile)
jsonfile.close()
TypeError: Object of type Tweet is not JSON serializable
But I get the above type error
How can I save output as JSON?
I know that typing command in termminal creates JSON, but I wanted to write a python version.

We'll need to convert each tweet to a dict first, as Python class objects are not serializable as JSON. Looking at the first object we can see the available methods and attributes like this: help(list_of_tweets[0]). Accessing the __dict__ of the first object we see:
# print(list_of_tweets[0].__dict__)
{'user': 'foobar',
'fullname': 'foobar',
'id': '143846459132929',
'url': '/foobar/status/1438420459132929',
'timestamp': datetime.datetime(2011, 12, 5, 23, 59, 53),
'text': 'blah blah',
'replies': 0,
'retweets': 0,
'likes': 0,
'html': '<p class="TweetTextSize...'}
Before we can dump it to json we'll need to convert the datetime objects to strings.
tweets = [t.__dict__ for t in list_of_tweets]
for t in tweets:
t['timestamp'] = t['timestamp'].isoformat()
Then we can use the json module to dump the data to a file.
import json
with open('data.json', 'w') as f:
json.dump(tweets, f)

Related

Python discord bot command with json database

So I'm making a command that is !command
When you type that in a chat it will update the member(s) score in the database.
The database will look something like this
{
"person": "epikUbuntu"
"score": "22"
}
How would I go on doing that?
edit:
If I wasn't clear I meant ho would I go on doing the python part of it?
JSON objects in python work like dictionaries.
You can write a new dictionary and save it:
data = {"foo": "bar"}
with open("file.json", "w+") as fp:
json.dump(data, fp, sort_keys=True, indent=4) # kwargs for beautification
And you can load data from a file (this will be for updating the file):
with open("file.json", "r") as fp:
data = json.load(fp) # loading json contents into data variable - this will be a dict
data["foo"] = "baz" # updating values
data["bar"] = "foo" # writing new values
with open("file.json", "w+") as fp:
json.dump(data, fp, sort_keys=True, indent=4)
Discord.py example:
import os # for checking the file exists
def add_score(member: discord.Member, amount: int):
if os.path.isfile("file.json"):
with open("file.json", "r") as fp:
data = json.load(fp)
try:
data[f"{member.id}"]["score"] += amount
except KeyError: # if the user isn't in the file, do the following
data[f"{member.id}"] = {"score": amount} # add other things you want to store
else:
data = {f"{member.id}": {"score": amount}}
# saving the file outside of the if statements saves us having to write it twice
with open("file.json", "w+") as fp:
json.dump(data, fp, sort_keys=True, indent=4) # kwargs for beautification
# you can also return the new/updated score here if you want
def get_score(member: discord.Member):
with open("file.json", "r") as fp:
data = json.load(fp)
return data[f"{member.id}"]["score"]
#bot.command()
async def cmd(ctx):
# some code here
add_score(ctx.author, 10)
# 10 is just an example
# you can use the random module if you want - random.randint(x, y)
await ctx.send(f"You now have {get_score(ctx.author)} score!")
References:
Context managers
Dictionaries
Loading data from a json
Writing to a json

Cannot write dict object as correct JSON to file

I try to read JSON from file, get values, transform them and back write to new file.
{
"metadata": {
"info": "important info"
},
"timestamp": "2018-04-06T12:19:38.611Z",
"content": {
"id": "1",
"name": "name test",
"objects": [
{
"id": "1",
"url": "http://example.com",
"properties": [
{
"id": "1",
"value": "1"
}
]
}
]
}
}
Above is a JSON that I read from file.
Below I attach a python program that gets values, creates new JSON and write it to file.
import json
from pprint import pprint
def load_json(file_name):
return json.load(open(file_name))
def get_metadata(json):
return json["metadata"]
def get_timestamp(json):
return json["timestamp"]
def get_content(json):
return json["content"]
def create_json(metadata, timestamp, content):
dct = dict(__metadata=metadata, timestamp=timestamp, content=content)
return json.dumps(dct)
def write_json_to_file(file_name, json_content):
with open(file_name, 'w') as file:
json.dump(json_content, file)
STACK_JSON = 'stack.json';
STACK_OUT_JSON = 'stack-out.json'
if __name__ == '__main__':
json_content = load_json(STACK_JSON)
print("Loaded JSON:")
print(json_content)
metadata = get_metadata(json_content)
print("Metadata:", metadata)
timestamp = get_timestamp(json_content)
print("Timestamp:", timestamp)
content = get_content(json_content)
print("Content:", content)
created_json = create_json(metadata, timestamp, content)
print("\n\n")
print(created_json)
write_json_to_file(STACK_OUT_JSON, created_json)
But the problem is that create json is not correct. Finally as result I get:
"{\"__metadata\": {\"info\": \"important info\"}, \"timestamp\": \"2018-04-06T12:19:38.611Z\", \"content\": {\"id\": \"1\", \"name\": \"name test\", \"objects\": [{\"id\": \"1\", \"url\": \"http://example.com\", \"properties\": [{\"id\": \"1\", \"value\": \"1\"}]}]}}"
It is not that what I want to achieve. It's not correct JSON. What do I wrong?
Solution:
Change the write_json_to_file(...) method like this:
def write_json_to_file(file_name, json_content):
with open(file_name, 'w') as file:
file.write(json_content)
Explanation:
The problem is, that when you're calling write_json_to_file(STACK_OUT_JSON, created_json) at the end of your script, the variable created_json contains a string - it's the JSON representation of the dictionary created in the create_json(...) function. But inside the write_json_to_file(file_name, json_content), you're calling:
json.dump(json_content, file)
You're telling the json module write the JSON representation of variable json_content (which contains a string) into the file. And JSON representation of a string is a single value encapsulated in double-quotes ("), with all the double-quotes it contains escaped by \.
What you want to achieve is to simply write the value of the json_content variable into the file and not have it first JSON-serialized again.
Problem
You're converting a dict into a json and then right before you write it into a file, you're converting it into a json again. When you retry to convert a json to a json it gives you the \" since it's escaping the " since it assumes that you have a value there.
How to solve it?
It's a great idea to read the json file, convert it into a dict and perform all sorts of operations to it. And only when you want to print out an output or write to a file or return an output you convert to a json since json.dump() is expensive, it adds 2ms (approx) of overhead which might not seem much but when your code is running in 500 microseconds it's almost 4 times.
Other Recommendations
After seeing your code, I realize you're coming from a java background and while in java the getThis() or getThat() is a great way to module your code since we represent our code in classes in java, in python it just causes problems in the readability of the code as mentioned in the PEP 8 style guide for python.
I've updated the code below:
import json
def get_contents_from_json(file_path)-> dict:
"""
Reads the contents of the json file into a dict
:param file_path:
:return: A dictionary of all contents in the file.
"""
try:
with open(file_path) as file:
contents = file.read()
return json.loads(contents)
except json.JSONDecodeError:
print('Error while reading json file')
except FileNotFoundError:
print(f'The JSON file was not found at the given path: \n{file_path}')
def write_to_json_file(metadata, timestamp, content, file_path):
"""
Creates a dict of all the data and then writes it into the file
:param metadata: The meta data
:param timestamp: the timestamp
:param content: the content
:param file_path: The file in which json needs to be written
:return: None
"""
output_dict = dict(metadata=metadata, timestamp=timestamp, content=content)
with open(file_path, 'w') as outfile:
json.dump(output_dict, outfile, sort_keys=True, indent=4, ensure_ascii=False)
def main(input_file_path, output_file_path):
# get a dict from the loaded json
data = get_contents_from_json(input_file_path)
# the print() supports multiple args so you don't need multiple print statements
print('JSON:', json.dumps(data), 'Loaded JSON as dict:', data, sep='\n')
try:
# load your data from the dict instead of the methods since it's more pythonic
metadata = data['metadata']
timestamp = data['timestamp']
content = data['content']
# just cumulating your print statements
print("Metadata:", metadata, "Timestamp:", timestamp, "Content:", content, sep='\n')
# write your json to the file.
write_to_json_file(metadata, timestamp, content, output_file_path)
except KeyError:
print('Could not find proper keys to in the provided json')
except TypeError:
print('There is something wrong with the loaded data')
if __name__ == '__main__':
main('stack.json', 'stack-out.json')
Advantages of the above code:
More Modular and hence easily unit testable
Handling of exceptions
Readable
More pythonic
Comments because they are just awesome!

Flask TypeError 'is not JSON serializable' - nested dictionary

i am using Flask as framework for my server, and while returning a response i get the following error:
> Traceback (most recent call last):
File "C:\Python27\lib\site-packages\flask\app.py", line 1612, in full_dispatch_request
rv = self.dispatch_request()
File "C:\Python27\lib\site-packages\flask\app.py", line 1598, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "C:\Python27\lib\site-packages\flask_restful\__init__.py", line 480, in wrapper
resp = resource(*args, **kwargs)
File "C:\Python27\lib\site-packages\flask\views.py", line 84, in view
return self.dispatch_request(*args, **kwargs)
File "C:\Python27\lib\site-packages\flask_restful\__init__.py", line 595, in dispatch_request
resp = meth(*args, **kwargs)
File "rest.py", line 27, in get
return jsonify(**solution)
File "C:\Python27\lib\site-packages\flask\json.py", line 263, in jsonify
(dumps(data, indent=indent, separators=separators), '\n'),
File "C:\Python27\lib\site-packages\flask\json.py", line 123, in dumps
rv = _json.dumps(obj, **kwargs)
File "C:\Python27\lib\json\__init__.py", line 251, in dumps
sort_keys=sort_keys, **kw).encode(obj)
File "C:\Python27\lib\json\encoder.py", line 209, in encode
chunks = list(chunks)
File "C:\Python27\lib\json\encoder.py", line 434, in _iterencode
for chunk in _iterencode_dict(o, _current_indent_level):
File "C:\Python27\lib\json\encoder.py", line 408, in _iterencode_dict
for chunk in chunks:
File "C:\Python27\lib\json\encoder.py", line 332, in _iterencode_list
for chunk in chunks:
File "C:\Python27\lib\json\encoder.py", line 332, in _iterencode_list
for chunk in chunks:
File "C:\Python27\lib\json\encoder.py", line 442, in _iterencode
o = _default(o)
File "C:\Python27\lib\site-packages\flask\json.py", line 80, in default
return _json.JSONEncoder.default(self, o)
File "C:\Python27\lib\json\encoder.py", line 184, in default
raise TypeError(repr(o) + " is not JSON serializable")
TypeError: {'origin': u'porto', 'dest': u'lisboa', 'price': '31', 'date': '2017-12-23', 'url': u'https://www.google.pt/flights/#search;f=opo;t=lis;d=2017-12-23;r=2017-12-24'} is not JSON serializable
i have the following function:
from flask import Flask, request, jsonify
from flask_restful import Resource, Api
from flask_cors import CORS, cross_origin
from json import dumps
import flights
import solveProblem
app = Flask(__name__)
api = Api(app)
CORS(app)
class Flights(Resource):
def get(self, data):
print 'received data from client: ' + data
solution = solveProblem.solve(data)
print 'got the solution from the script! \nSOLUTION: \n'
print solution
return jsonify(solution)
api.add_resource(Flights, '/flights/<string:data>')
if __name__ == '__main__':
app.run()
while debugging the problem, i found the following solutions which did not work:
1) return solution instead of {'solution': solution}
2) do jsonify(solution)
3) do jsonify(**solution)
none of the above worked for me;
i wonder why this happens, when i am trying to return a valid dictionary:
{'flights': [[{'origin': u'porto', 'dest': u'lisboa', 'price': '31', 'date': '2017-12-23', 'url': u'https://www.google.pt/flights/#search;f=opo;t=lis;d=2017-12-23;r=2017-12-24'}]], 'cost': '31'}
any help is appreciated.
Thanks
My guess is when you were creating 'solution', the data that got assigned to it was an incorrectly formatted dictionary
{'item', 'value'}
Instead of:
{'item': 'value'}
Thus creating a set instead of a dict
we cannot directly use the jsonify when your trying to converting list of data into json.
there is two approaches are there you can convert list into dictionary for that we need to write function that convert your list data into dictionary which is complicated task .
there is one smart work you can use Marshmallow library . it serialized you list data after that you can use jsonify.
In flask-restful, Resource class get method will just need to return python data structure. So just remove jsonify. For User Defined Object, you can use marshal_with() decorator.
See more: https://flask-restful.readthedocs.io/en/latest/quickstart.html#a-minimal-api
Since most of your functions are declared elsewhere, I worked a toy Flask program just to pass the dictionary you got stuck with.
[Edit] Before I was using the standard python json module. I edited it to use flask's own jsonify, and it works with the direct dictionary still. So the error is not where the OP is looking for.
{'flights': [[{'origin': u'porto', 'dest': u'lisboa', 'price': '31', 'date': '2017-12-23', 'url': u'https://www.google.pt/flights/#search;f=opo;t=lis;d=2017-12-23;r=2017-12-24'}]], 'cost': '31'}
The following program runs and returns the dictionary as a JSON object:
import flask
app = flask.Flask(__name__)
#app.route('/')
def hello():
jdic = flask.jsonify( {'origin': u'porto', 'dest': u'lisboa', 'price': '31', 'date': '2017-12-23', 'url': u'https://www.google.pt/flights/#search;f=opo;t=lis;d=2017-12-23;r=2017-12-24'} )
return jdic
if __name__ == '__main__':
app.run()
As I found out, this error generally occurs when the response is not a pure python dictionary. This happened to me because I was trying to pass a class object. So, to solve the problem, i created a class method which returns a dictionary describing the object, and use this to create the json response.
Conclusion: Use Pure python objects, which are easily translated to JSON.
I had the same problem with a 3 level Nested Dictionary; it was valid, json serializable and via command line json.dumps had no issue. However, Flask did not want to output it: "TypeError", not json serializable. The only difference is that I am using Python 3.5.
So I made a copy of it as a string (that on command line was json serializable!) and passed to Flask output, it worked.
Try to pass the nested json as
eval(str(solution))
and see the error. It's not a definitive solution but more a workaround.
Hope it helps.

Scraping Data from JSON

How to scrape this data,
http://jsonviewer.stack.hu/#http://91.134.133.185:5000/viaroute?loc=25.299919,55.376774&loc=25.298738,55.369181
and Extract only total_time" to a file?
It should be fairly easy to achieve this with a little search.
You just have to find some modules to work with json, dataframes and text files, and learn how to use them.
Steps:
1 - read json data using pandas.from_json()
2 - set data = df['total_time']
2 - write data using pandas.to_csv()
Simple as py.
Documentation:
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html
import json
json_string = '''Json data here'''
data = json.loads(json_string)
total_time = data["route_summary"]["total_time"]
f = open("file_name_here.txt", "w+")
f.write(str(total_time))
f.close()
I've wrote this program for you:
import json, urllib2
url = 'http://91.134.133.185:5000/viaroute?loc=25.299919,55.376774&loc=25.298738,55.369181'
response = urllib2.urlopen(url)
data = json.load(response)
tot_time = str(data['route_summary']['total_time'])
s = tot_time + "\n"
outfile = "C:\\Users\\USER\\Desktop\\outfile.txt"
with open(outfile, "a+") as f:
f.write(s)
It'll append each observation to the end of outfile.txt
Saving json data to a file and reading that file
import json, urllib2
url = 'http://91.134.133.185:5000/viaroute?loc=25.299919,55.376774&loc=25.298738,55.369181'
response = urllib2.urlopen(url)
data = json.load(response)
outfile = "C:\\Users\\USER\\Desktop\\outfile.txt"
#saving json to file
with open(outfile, "w") as f:
f.write(str(data))
#reading file with json data
with open(outfile, 'r') as g:
json_data = g.readline()
print json_data
#Output:
{u'route_geometry': u'{_ego#m}|rhBpBaBvHuC`EuArEUtEtAlDvEnD`MlDvMli#hsEfFzn#QlTgNhwCs#fKwBhF', u'status': 0, u'via_indices': [0, 15], u'route_summary': {u'total_time': 101, u'end_point': u'', u'start_point': u'', u'total_distance': 871}, u'route_name': [u'', u''], u'hint_data': {u'checksum': 326195011, u'locations': [u'AXQDAP____8AAAAABwAAABEAAAAYAAAAIwIAAERwAgAAAAAADgyCAef7TAMCAAEB', u'bOsDAP____8AAAAAAwAAAAcAAADFAQAAFAAAAEJwAgAAAAAANQeCAd3dTAMFAAEB']}, u'via_points': [[25.299982, 55.376873], [25.29874, 55.369179]], u'status_message': u'Found route between points', u'found_alternative': False}

Serialize in JSON a base64 encoded data

I'm writing a script to automate data generation for a demo and I need to serialize in a JSON some data. Part of this data is an image, so I encoded it in base64, but when I try to run my script I get:
Traceback (most recent call last):
File "lazyAutomationScript.py", line 113, in <module>
json.dump(out_dict, outfile)
File "/usr/lib/python3.4/json/__init__.py", line 178, in dump
for chunk in iterable:
File "/usr/lib/python3.4/json/encoder.py", line 422, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/usr/lib/python3.4/json/encoder.py", line 396, in _iterencode_dict
yield from chunks
File "/usr/lib/python3.4/json/encoder.py", line 396, in _iterencode_dict
yield from chunks
File "/usr/lib/python3.4/json/encoder.py", line 429, in _iterencode
o = _default(o)
File "/usr/lib/python3.4/json/encoder.py", line 173, in default
raise TypeError(repr(o) + " is not JSON serializable")
TypeError: b'iVBORw0KGgoAAAANSUhEUgAADWcAABRACAYAAABf7ZytAAAABGdB...
...
BF2jhLaJNmRwAAAAAElFTkSuQmCC' is not JSON serializable
As far as I know, a base64-encoded-whatever (a PNG image, in this case) is just a string, so it should pose to problem to serializating. What am I missing?
You must be careful about the datatypes.
If you read a binary image, you get bytes.
If you encode these bytes in base64, you get ... bytes again! (see documentation on b64encode)
json can't handle raw bytes, that's why you get the error.
I have just written some example, with comments, I hope it helps:
from base64 import b64encode
from json import dumps
ENCODING = 'utf-8'
IMAGE_NAME = 'spam.jpg'
JSON_NAME = 'output.json'
# first: reading the binary stuff
# note the 'rb' flag
# result: bytes
with open(IMAGE_NAME, 'rb') as open_file:
byte_content = open_file.read()
# second: base64 encode read data
# result: bytes (again)
base64_bytes = b64encode(byte_content)
# third: decode these bytes to text
# result: string (in utf-8)
base64_string = base64_bytes.decode(ENCODING)
# optional: doing stuff with the data
# result here: some dict
raw_data = {IMAGE_NAME: base64_string}
# now: encoding the data to json
# result: string
json_data = dumps(raw_data, indent=2)
# finally: writing the json string to disk
# note the 'w' flag, no 'b' needed as we deal with text here
with open(JSON_NAME, 'w') as another_open_file:
another_open_file.write(json_data)
Alternative solution would be encoding stuff on the fly with a custom encoder:
import json
from base64 import b64encode
class Base64Encoder(json.JSONEncoder):
# pylint: disable=method-hidden
def default(self, o):
if isinstance(o, bytes):
return b64encode(o).decode()
return json.JSONEncoder.default(self, o)
Having that defined you can do:
m = {'key': b'\x9c\x13\xff\x00'}
json.dumps(m, cls=Base64Encoder)
It will produce:
'{"key": "nBP/AA=="}'
What am I missing?
The error is yelling that a binary is not JSON serializable.
from base64 import b64encode
# *binary representation* of the base64 string
assert b64encode(b"binary content") == b'YmluYXJ5IGNvbnRlbnQ='
# base64 string
assert b64encode(b"binary content").decode('utf-8') == 'YmluYXJ5IGNvbnRlbnQ='
The latter is definitely "JSON serializable" because is the base64 string representation of the binary b"binary content".