how to handle value errors in while 1 loop with scheduling it with a Timer class - json

i have written a code to read registers of a modbus communication protocol. i have attached the code below as well.. am able to overcome the i/o errors by exception handling method where as the value error that i get , am not able throw that error and move on.
Basically what i am doing is am reading the data in the registers and sending up to the server. but my requirement is i have to read the values every second and for 24 hours. so i need to build a robust system that will overcome these value errors and continue executing the threads i have created.
the code to read registers is given below :
import minimalmodbus
import serial
from ctypes import *
import struct
import time
minimalmodbus.BAUDRATE = 9600
minimalmodbus.PARITY = serial.PARITY_NONE
minimalmodbus.BYTESIZE = 8
minimalmodbus.TIMEOUT=5
minimalmodbus.CLOSE_PORT_AFTER_EACH_CALL = True
energy_meter = serial.Serial("/dev/ttyUSB0", baudrate=9600,
parity=serial.PARITY_NONE,
stopbits=serial.STOPBITS_ONE, bytesize=serial.EIGHTBITS, timeout=5)
energy_meter = minimalmodbus.Instrument('/dev/ttyUSB0', 2, mode='rtu')
#energy_meter.debug = True
def convert_in_float(value1, value2):
raw = struct.pack('>HH',value1,value2)
ans = struct.unpack('>f', raw)[0]
return ans
def sdm630():
parameter_list1 = [ 0 ] * 0x12
parameter_list2 = [ 0 ] * 3
parameter_list3 = [ 0 ] * 6
#print energy_meter
error = 0
try:
index = 0
read_values1 = energy_meter.read_registers( 0 , 0x24, 4)
for i in range ( 0, 0x24, 2):
parameter_list1[index] = convert_in_float( read_values1[i], read_values1[i+1])
#print "Parameter read from register : ", hex(index), "is : ", parameter_list1[index] ,"\n"
index = index + 1
#read parameter list 2 & 3 in a similar way
error = 0
return error, parameter_list1, parameter_list2, parameter_list3, int(time.time())
except IOError or ValueError:
print "got error"
error = 1
return error, parameter_list1, parameter_list2, parameter_list3, int(time.time())
also, i have written a separate code to dump all data to server and is shown below :
import time
from pymongo import MongoClient
client = MongoClient('mongodb://10.32.36.40:27017')
db = client.clytics
collection = db['raspberry_pi']
def pushData(error, value1, value2, value3, value4):
if error == 0 :
temp_js = {
#variable assignment
}
temp_js_id = collection.insert(temp_js)
using the above two codes i have created threads for each function. and i only execute this code and after 20 minutes of execution , i get value errors and the program doesnt execute anymore. the main program is given below :
import time
from threading import Thread
from threading import Timer
from Queue import Queue
from modbus import sdm630
from dumpInDB import pushData
from processData import process_the_data
DELAY_SEC = 1
DELAY_MIN = 60
LOOP_LIMIT = 60
def getData(q):
error, parameter_list1, parameter_list2 , parameter_list3, parameter_list4= sdm630()
print "In getData - data:", parameter_list1, parameter_list2
q.put([error, parameter_list1, parameter_list2, parameter_list3, parameter_list4])
def processData(q1,q2):
sec_data = q1.get()
min_data = process_the_data(sec_data)
print "In processData - data:", sec_data, min_data
q2.put(min_data)
print "queue:", q2.qsize()
def putData(q):
#print "In putData - data:", value[0], value[1], value[2]
for i in range(0, q.qsize()):
value = q.get()
print "In putData - data:", value[0], value[1], value[2], value[3]
pushData( value[0], value[1] , value[2], value[3] , value[4])
def thread1(threadName, q):
i = 0
while 1:
t = Timer( DELAY_SEC, getData, args = (q,))
t.start()
time.sleep(DELAY_SEC)
def thread2( threadName, q1,q2):
i = 0
print "in thread2"
while 1:
t = Timer( DELAY_SEC, processData, args = (q1,q2,))
t.start()
time.sleep(DELAY_SEC)
def thread3( threadName, q):
i = 0
print "in thread3"
while 1:
t = Timer( DELAY_MIN, putData, args = (q,))
t.start()
print "schedule time - min"
time.sleep(DELAY_MIN)
queue_second = Queue()
queue_minute = Queue()
thread1 = Thread( target=thread1, args=("Thread-1", queue_second) )
thread2 = Thread( target=thread2, args=("Thread-2", queue_second, queue_minute) )
thread3 = Thread( target=thread3, args=("Thread-3", queue_minute) )
thread1.start()
thread2.start()
thread3.start()
thread1.join()
thread2.join()
thread3.join()
am stuck with this error. shown below :
minimalmodbus.Instrument<id=0xb6b2d9b8, address=2, mode=rtu, close_port_after_each_call=True, precalculate_read_size=True, debug=False, serial=Serial<id=0xb6b482f0, open=False>(port='/dev/ttyUSB0', baudrate=9600, bytesize=8, parity='N', stopbits=1, timeout=5, xonxoff=False, rtscts=False, dsrdtr=False)>
Traceback (most recent call last):
File "topScript.py", line 7, in <module>
from modbus import sdm630
File "/home/pi/scripts/modbus.py", line 60, in <module>
sdm630()
File "/home/pi/scripts/modbus.py", line 32, in sdm630
read_values1 = energy_meter.read_registers( 0 , 0x24, 4)
File "/usr/local/lib/python2.7/dist-packages/minimalmodbus.py", line 498, in read_registers
numberOfRegisters=numberOfRegisters, payloadformat='registers')
File "/usr/local/lib/python2.7/dist-packages/minimalmodbus.py", line 697, in _genericCommand
payloadFromSlave = self._performCommand(functioncode, payloadToSlave)
File "/usr/local/lib/python2.7/dist-packages/minimalmodbus.py", line 798, in _performCommand
payloadFromSlave = _extractPayload(response, self.address, self.mode, functioncode)
File "/usr/local/lib/python2.7/dist-packages/minimalmodbus.py", line 1075, in _extractPayload
raise ValueError(text)
ValueError: Checksum error in rtu mode: '\xa6\xe6' instead of '\xf7[' . The response is: '\xff\xf7HCeN\xce\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00?\x80\x00\x00?\x80\x00\x00?\x80\x00\x00\xa6\xe6' (plain response: '\xff\xf7HCeN\xce\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
sometimes value error keeps popping up for sometime and finally fails and gives a message saying no more threads can be created.(reached to maximum level)

Your syntax to handle multiple exceptions is wrong. Use something like:
except (ValueError, IOError):
For more details see the Python tutorial https://docs.python.org/2/tutorial/errors.html

Related

CSV read into MySQLdb failing

I am having a problem with reading my csv file into the MySQL database. I have tried a number of solutions, but the errors just keep changing and the code isn't working. This same code had worked with another csv file, so I'm thinking I might be doing something wrong with this one?
Here is my code
from database_access import *
from builtins import bytes, int, str
import codecs
import csv
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
import re
import cgi
import MySQLdb
import chardet
# from database_access import *
import MySQLdb
import simplejson
if __name__ == '__main__':
with open("SIMRA.csv",'r') as file:
reader = csv.reader(file)
#reader = csv.reader(text)
next(reader, None)
print ("project running")
#print (row[7])
#rowlist = []
all_links = []
all_project_ids = []
for row in reader:
if row[7] != "" and row[16] != "":
country = row[2]
city = row[8]
description = row[11] + '' + row[12]
title = row[7].replace("'", "''")
link = row[16]
#date_start = row[9]
#print a check here
print(title,description,country, city, link)
db = MySQLdb.connect(host, username, password, database, charset='utf8')
cursor = db.cursor()
new_project = True
proj_check = "SELECT * from Projects where ProjectName like '%" + title + "%'"
#proj_check = "SELECT * from Projects where ProjectName like %s",(title,)
#cur.execute("SELECT * FROM records WHERE email LIKE %s", (search,))
cursor.execute(proj_check)
num_rows = cursor.rowcount
if num_rows != 0:
new_project = False
url_compare = "SELECT * from Projects where ProjectWebpage like '" + link + "'"
#url_compare = "SELECT * from Projects where ProjectWebpage like %s",(link,)
cursor.execute(url_compare)
num_rows = cursor.rowcount
if num_rows != 0:
new_project = False
if new_project:
project_insert = "Insert into Projects (ProjectName,ProjectWebpage,FirstDataSource,DataSources_idDataSources) VALUES (%s,%s,%s,%s)"
cursor.execute(project_insert, (title, link,'SIMRA', 5))
projectid = cursor.lastrowid
print(projectid)
#ashoka_projectids.append(projectid)
db.commit()
ins_desc = "Insert into AdditionalProjectData (FieldName,Value,Projects_idProjects,DateObtained) VALUES (%s,%s,%s,NOW())"
cursor.executemany(ins_desc, ("Description", description, str(projectid)))
db.commit()
ins_location = "Insert into ProjectLocation (Type,Country,City,Projects_idProjects) VALUES (%s,%s,%s,%s)"
cursor.execute(ins_location, ("Main", country,city, str(projectid)))
db.commit()
else:
print('Project already exists!')
print(title)
all_links.append(link)
#print out SIMRA's links to a file for crawling later
with open('simra_links', 'w', newline='') as f:
write = csv.writer(f)
for row in all_links:
columns = [c.strip() for c in row.strip(', ').split(',')]
write.writerow(columns)
When I ran this, I got the following error:
File "/usr/lib/python3.8/codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa3 in position 898: invalid start byte
I did some research and tried handling the encoding error by adding different forms of encoding, as seen here - UnicodeDecodeError: ‘utf8’ codec can’t decode byte 0xa5 in position 0: invalid start byte, and Python MySQLdb TypeError: not all arguments converted during string formatting. Added this in this in the csv open parameter -
with open("SIMRA.csv", 'r', encoding="cp437", errors='ignore') as file:
Running the code with these different encoding options came up with a different error:
MySQLdb._exceptions.ProgrammingError: not all arguments converted during bytes formatting
Further research suggested using tuples or lists in order to address this problem, so I added these in the 'select' function in the code, as suggested here - Python MySQLdb TypeError: not all arguments converted during string formatting and in the Python SQL documentation here - PythonMySqldb
So the select query became:
proj_check = "SELECT * from Projects where ProjectName like %s",(title,)
cursor.execute(proj_check)
num_rows = cursor.rowcount
if num_rows != 0:
new_project = False
url_compare = "SELECT * from Projects where ProjectWebpage like %s",(link,)
cursor.execute(url_compare)
num_rows = cursor.rowcount
if num_rows != 0:
new_project = False
When I ran the code, I came up with this Assertion Error and I have no idea what to do anymore.
File "/home/ros/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 205, in execute
assert isinstance(query, (bytes, bytearray))
AssertionError
I have run out of ideas. It might be that I'm missing something small, but I can't figure this out now as I've been battling with this for two days now.
Can anyone help point out what I'm missing? It will be greatly appreciated. This code ran perfectly with another csv file. I am running this with Python 3.8 btw.
Have solved this now. I had to use a different encoding with the original code and this solved the problem. So, I changed the csv open parameter to:
with open("SIMRA.csv",'r', encoding="ISO-8859-1") as file:
reader = csv.reader(file)
Were you expecting £? You need to specify what the encoding of the file is. It may be "latin1". See the syntax of LOAD DATA for how to specify CHARACTER SET latin1.

How to get minutes/hours/seconds from a django request?

I wanna retrieve time fields separately from a request. But when I try to process values, HttpResponse gives 406 status code.
def ReservationActions(request):
if(request.method == 'POST'):
body_as_json = json.loads(request.body)
try:
veh = Vehicles.objects.get(id = body_as_json['vehic__id'])
sp = ParkingSpots.objects.get(id = body_as_json['spot__id'])
reserve = Reservations(vehic = veh, spot = sp, start_date = body_as_json.get('start'), end_date = body_as_json.get('end'))
if(reserve.start_date.datetime.minute % 15 is not 0): #this is where it goes to "except" part
return HttpResponse(status=306)
reserve.save()
# Return a "created" (201) status code.
return HttpResponse(status=201)
except:
# Return a "not acceptable" (406) status code.
return HttpResponse(status=406)
this is the json object I send:
{
"vehic__id": 1,
"spot__id": 1,
"start": "2018-03-29T23:00:00.999Z",
"end" : "2018-03-30T23:00:30.000Z"
}
You parse it to a datetime object, for example with the python-dateutil [PyPi]. For example with:
from dateutil.parser import parse as dtparse
try:
start = dtparse(body_as_json.get('start'))
except (ValueError, TypeError):
# ... (invalid date, or not a string)
pass
In case this does not raise an exception, start is a datetime object. Then the start. So you can check this with:
if not start.minute % 15:
# minute is 0, 15, 30, or 45
pass
else:
# not the case
pass

Runtime error : SWIG std::function invocation failed, in azure databricks

While using the Routing solver of the google or-tools,a runtime error is thrown. There was a no changes made in the code segment,before and after getting this error. Previously, it was working. But recently after a DB connection modification was made, I am getting this error.
(Although, I doubt how a dB connection modification could affect the routing solver)
I am using the Azure Databricks notebook. As I am new to operations research, I have taken the example given in the https://developers.google.com/optimization/routing/pickup_delivery#complete_programs page, as my reference.
This is Vehicle Routing with Pick and Delivery problem.
from __future__ import print_function
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
def create_data_model():
"""Stores the data for the problem."""
data = {}
data['distance_matrix'] = dist
data['pickups_deliveries'] = nodes_pickup_delivery
data['num_vehicles'] = 2
data['depot'] = 0
return data
solution_list = []
def print_solution(data, manager, routing, assignment):
"""Prints assignment on console."""
total_distance = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
route_distance = 0
i = []
while not routing.IsEnd(index):
i.append(manager.IndexToNode(index))
plan_output += ' {} -> '.format(str(cityList[manager.IndexToNode(index)]))
previous_index = index
index = assignment.Value(routing.NextVar(index))
route_distance += routing.GetArcCostForVehicle(previous_index, index, vehicle_id)
solution_list.append(i)
plan_output += '{}\n'.format(str(cityList[manager.IndexToNode(index)]))
plan_output += 'Distance of the route: {} miles\n'.format(route_distance)
#print(plan_output)
total_distance += route_distance
#print('Total Distance of all routes: {} miles'.format(total_distance))
def main():
"""Entry point of the program."""
# Instantiate the data problem.
data = create_data_model()
# Create the routing index manager.
manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']), data['num_vehicles'], data['depot'])
# Create Routing Model.
routing = pywrapcp.RoutingModel(manager)
# Define cost of each arc.
def distance_callback(from_index, to_index):
"""Returns the manhattan distance between the two nodes."""
# Convert from routing variable Index to distance matrix NodeIndex.
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data['distance_matrix'][from_node][to_node]
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# Add Distance constraint.
dimension_name = 'Distance'
routing.AddDimension(
transit_callback_index,
0, # no slack
40, # vehicle maximum travel distance
True, # start cumul to zero
dimension_name)
distance_dimension = routing.GetDimensionOrDie(dimension_name)
distance_dimension.SetGlobalSpanCostCoefficient(100)
# Define Transportation Requests.
for request in data['pickups_deliveries']:
pickup_index = manager.NodeToIndex(request[0])
delivery_index = manager.NodeToIndex(request[1])
routing.AddPickupAndDelivery(pickup_index, delivery_index)
routing.solver().Add(routing.VehicleVar(pickup_index) == routing.VehicleVar(delivery_index))
routing.solver().Add(distance_dimension.CumulVar(pickup_index) <= distance_dimension.CumulVar(delivery_index))
# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
#search_parameters.time_limit.seconds = 90
search_parameters.first_solution_strategy = (routing_enums_pb2.FirstSolutionStrategy.PARALLEL_CHEAPEST_INSERTION)
# Solve the problem.
assignment = routing.SolveWithParameters(search_parameters)
# Print solution on console.
if assignment:
print_solution(data, manager, routing, assignment)
if __name__ == '__main__':
main()
The error I am getting is pointing to the following code segment: 'plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)'
The error thrown is:
RuntimeError: SWIG std::function invocation failed.
RuntimeErrorTraceback (most recent call last)
<command-2714173895177597> in <module>()
89
90 if __name__ == '__main__':
---> 91 main()
<command-2714173895177597> in main()
85 # Print solution on console.
86 if assignment:
---> 87 print_solution(data, manager, routing, assignment)
88
89
<command-2714173895177597> in print_solution(data, manager, routing, assignment)
18 for vehicle_id in range(data['num_vehicles']):
19 index = routing.Start(vehicle_id)
---> 20 plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
21 route_distance = 0
22 i = []
RuntimeError: SWIG std::function invocation failed.
Kindly help.
def create_data_model():
"""Stores the data for the problem."""
data = {}
data['distance_matrix'] = dist
data['pickups_deliveries'] = nodes_pickup_delivery
data['num_vehicles'] = 2
data['depot'] = 0 #Dummy location
return data
solution_list = []
def print_solution(data, manager, routing, assignment):
"""Prints assignment on console."""
total_distance = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
route_distance = 0
i = []
while not routing.IsEnd(index):
i.append(manager.IndexToNode(index))
plan_output += ' {} -> '.format(manager.IndexToNode(index))
previous_index = index
index = assignment.Value(routing.NextVar(index))
route_distance += routing.GetArcCostForVehicle(previous_index, index, vehicle_id)
solution_list.append(i)
plan_output += '{}({})\n'.format(str(cityList[manager.IndexToNode(index)]))
plan_output += 'Distance of the route: {} miles\n'.format(route_distance)
print(plan_output)
total_distance += route_distance
print('Total Distance of all routes: {} miles'.format(total_distance))

Inserting cipher text into mysql using python

So i have a program which will encrypt a string using AES and generate cipher which in bytes[].
I wish to store this cipher as it is in mysql database.
I found we could use VARBINARY data type in mysql to do so.
In what ways we could achieve so.
Here is my try to do so :
import ast
import mysql.connector
from Crypto.Cipher import AES
from Crypto.Random import get_random_bytes
def encrypt(key, msg):
iv = get_random_bytes(16)
cipher = AES.new(key, AES.MODE_CFB, iv)
ciphertext = cipher.encrypt(msg) # Use the right method here
db = iv + ciphertext
print(db)
cursor.executemany(sql_para_query,db)
print(cursor.fetchone())
connection.commit()
return iv + ciphertext
def decrypt(key, ciphertext):
iv = ciphertext[:16]
ciphertext = ciphertext[16:]
cipher = AES.new(key, AES.MODE_CFB, iv)
msg = cipher.decrypt(ciphertext)
return msg.decode("utf-8")
if __name__ == "__main__":
connection = mysql.connector.connect(host = "localhost", database = "test_db", user = "sann", password = "userpass",use_pure=True)
cursor = connection.cursor(prepared = True)
sql_para_query = """insert into test1 values(UNHEX(%s)) """
ed = input("(e)ncrypt or (d)ecrypt: ")
key = str(1234567899876543)
if ed == "e":
msg = input("message: ")
s= encrypt(key, msg)
print("Encrypted message: ", s)
file = open("e_tmp","wb+")
file.write(s)
print(type(s))
elif ed == "d":
#smsg = input("encrypted message: ")
#file = open("e_tmp","rb")
#smsg = file.read()
#print(type(smsg))
sql_para_query = """select * from test1"""
cursor.execute(sql_para_query)
row = cursor.fetchone()
print(row)
#smsg = str(smsg)
#msg = ast.literal_eval(smsg)
#print(msg)
#print(type(msg))
#s=decrypt(key, msg)
#print("Decrypted message: ", s)
#print(type(s))
Error I'm getting :
Traceback (most recent call last): File
"/home/mr_pool/.local/lib/python3.6/site-packages/mysql/connector/cursor.py",
line 1233, in executemany
self.execute(operation, params) File "/home/mr_pool/.local/lib/python3.6/site-packages/mysql/connector/cursor.py",
line 1207, in execute
elif len(self._prepared['parameters']) != len(params): TypeError: object of type 'int' has no len()
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "tmp1.py", line 36, in
s= encrypt(key, msg) File "tmp1.py", line 14, in encrypt
cursor.executemany(sql_para_query,db) File "/home/mr_pool/.local/lib/python3.6/site-packages/mysql/connector/cursor.py",
line 1239, in executemany
"Failed executing the operation; {error}".format(error=err)) mysql.connector.errors.InterfaceError: Failed executing the operation;
object of type 'int' has no len()
Any other alternatives are also welcome.
My ultimate goal is to store the encrypted text in database.
I reproduced your error, but it seems there are more errors in your code.
The key as well as the message are strings, therefore I got this error:
TypeError: Object type <class 'str'> cannot be passed to C code
Which I fixed by encoding them in utf-8:
# line 38:
key = str(1234567899876543).encode("utf8")
# .... line 41:
s= encrypt(key, msg.encode("utf8"))
The UNHEX function in your SQL Query is not needed because we are entering the data as VARBINARY. You can change your statement to:
"""insert into test1 values(%s) """
The function executemany() can be replaced by execute() because you are only entering one statement. However I will write the solution for using both, execute or executemany.
insert with execute():
From the documentation:
cursor.execute(operation, params=None, multi=False)
iterator = cursor.execute(operation, params=None, multi=True)
This method executes the given database operation (query or command). The parameters found in the tuple or dictionary params are bound to the variables in the operation. Specify variables using %s or %(name)s parameter style (that is, using format or pyformat style). execute() returns an iterator if multi is True.
https://dev.mysql.com/doc/connector-python/en/connector-python-api-mysqlcursor-execute.html
So we need just to build a tuple with your parameters by changing the cursor.execute line to:
cursor.execute(sql_para_query, (db, ))
insert with executemany():
From the documentation:
cursor.executemany(operation, seq_of_params)
This method prepares a database operation (query or command) and executes it against all parameter sequences or mappings found in the sequence seq_of_params.
https://dev.mysql.com/doc/connector-python/en/connector-python-api-mysqlcursor-executemany.html
Therefore we need to build a sequence with values you'd like to insert. In your case just one value:
cursor.executemany(sql_para_query, [(db, )])
To insert multiple values, you can add as many tuples into your sequence as you want.
full code:
import ast
import mysql.connector
from Crypto.Cipher import AES
from Crypto.Random import get_random_bytes
def encrypt(key, msg):
iv = get_random_bytes(16)
cipher = AES.new(key, AES.MODE_CFB, iv)
ciphertext = cipher.encrypt(msg) # Use the right method here
db = iv + ciphertext
cursor.execute(sql_para_query, (db, ))
connection.commit()
return iv + ciphertext
def decrypt(key, ciphertext):
iv = ciphertext[:16]
ciphertext = ciphertext[16:]
cipher = AES.new(key, AES.MODE_CFB, iv)
msg = cipher.decrypt(ciphertext)
return msg.decode("utf-8")
if __name__ == "__main__":
connection = mysql.connector.connect(host = "localhost", database = "test_db", user = "sann", password = "userpass",use_pure=True)
cursor = connection.cursor(prepared = True)
sql_para_query = """insert into test1 values(%s) """
ed = input("(e)ncrypt or (d)ecrypt: ")
key = str(1234567899876543).encode("utf8")
if ed == "e":
msg = input("message: ")
s= encrypt(key, msg.encode("utf8"))
print("Encrypted message: ", s)
file = open("e_tmp","wb+")
file.write(s)
print(type(s))
elif ed == "d":
sql_para_query = """select * from test1"""
cursor.execute(sql_para_query)
row = cursor.fetchone()
msg = row[0] # row is a tuple, therefore get first element of it
print("Unencrypted message: ", msg)
s=decrypt(key, msg)
print("Decrypted message: ", s)
output:
#encrypt:
(e)ncrypt or (d)ecrypt: e
message: this is my test message !!
Encrypted message: b"\x8f\xdd\xe6f\xb1\x8e\xb51\xc1'\x9d\xbf\xb5\xe1\xc7\x87\x99\x0e\xd4\xb2\x06;g\x85\xc4\xc1\xd2\x07\xb5\xc53x\xb9\xbc\x03+\xa2\x95\r4\xd1*"
<class 'bytes'>
#decrypt:
(e)ncrypt or (d)ecrypt: d
Unencrypted message: bytearray(b"\x8f\xdd\xe6f\xb1\x8e\xb51\xc1\'\x9d\xbf\xb5\xe1\xc7\x87\x99\x0e\xd4\xb2\x06;g\x85\xc4\xc1\xd2\x07\xb5\xc53x\xb9\xbc\x03+\xa2\x95\r4\xd1*")
Decrypted message: this is my test message !!

Twitter streaming script is throwing a keyerror on location field of the tweet

I have as of now written a Python script to stream tweets and I have made use of the tweepy module to do so. After streaming for around 3 minutes for tweets, I dump these tweets into a .json file. I populate these tweets (I try to) into a pandas dataframe for location and text fields of the tweet. The text field of the tweet gets populated but not for every tweet (problem 1) in the .json file and as far as the location field is concerned a keyerror (problem 2) is thrown. May I know what exactly is going wrong.
twitter_stream_dump.py
import time
import json
import pandas as pd
import re
#tweepy based modules
import tweepy
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy.streaming import StreamListener
#initializing authentication credentials
consumer_key = ''
consumer_secret = ''
access_key = ''
access_secret = ''
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener) :
def __init__(self,time_limit) :
self.start_time = time.time()
self.limit = time_limit
self.saveFile = open('requests.json','a')
super(StdOutListener,self).__init__()
def on_data(self, data) :
if ((time.time() - self.start_time) < self.limit) :
self.saveFile.write(data)
self.saveFile.write('\n')
return True
else :
self.saveFile.close()
return False
def on_error(self, status) :
print(status)
def getwords(string) :
return re.findall(r"[\w'#]+|[.,!?;]",string)
if __name__ == '__main__' :
#This handles Twitter authetification and the connection to Twitter Streaming API
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
time_limit = input("Enter the time limit in minutes : ")
time_limit *= 60
stream = Stream(auth,listener = StdOutListener(time_limit))
string = raw_input("Enter the list of keywords/hashtags to be compared : ")
keyword_list = getwords(string)
#This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
stream.filter(track = keyword_list)
tweets_data_path = 'requests.json'
tweets_data = []
tweet_list = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file :
try :
tweet = json.loads(line)
tweet_list.append(tweet)
except :
continue
num_tweets_collected = len(tweet_list)
#Creates a data frame structure
tweet_dataframe = pd.DataFrame()
text_dump = open('text_dump.txt', 'w')
#Populating the location field of the data frame
#tweet_dataframe['location'] = map(lambda tweet : tweet['location'], tweet_list)
tweet_dataframe['text'] = map(lambda tweet : tweet['text'], tweet_list)
print(tweet_dataframe['text'])
Errors :
abhijeet-mohanty-2:Desktop SubrataMohanty$ python twitter_stream_dump.py
Enter the time limit in minutes : 3
Enter the list of keywords/hashtags to be compared : python ruby scala
Traceback (most recent call last):
File "twitter_stream_dump.py", line 81, in <module>
tweet_dataframe['location'] = map(lambda tweet : tweet['location'], tweet_list)
File "twitter_stream_dump.py", line 81, in <lambda>
tweet_dataframe['location'] = map(lambda tweet : tweet['location'], tweet_list)
KeyError: 'location'
requests.json (My .json file)
https://drive.google.com/file/d/0B1p05OszaBkXLWFsQ2VmeWVjbDQ/view?usp=sharing
The location field is a user-defined value and will sometimes not be present.
That's why you're getting the KeyError.
Note that location is part of the "user profile" metadata that comes with a tweet. It's intended to describe a user's location (like their hometown), and not the geotagged location of a given tweet.
In case you're interested in geotags, first check a tweeet to see if the geo_enabled field is true. If so, the geo, coordinates, and place fields may contain geotagged information.
As for missing text entries, I don't see the same issue when using the data you provided. It's possible the issue was caused by your try/except clause when reading in lines of data. Consider this approach:
for i, line in enumerate(tweets_file):
if line.rstrip():
tweet = json.loads(line)
tweet_list.append(tweet)
num_tweets_collected = len(tweet_list)
texts = [tweet['text'] for tweet in tweet_list]
tweet_dataframe = pd.DataFrame(texts, columns=['text'])
Sample output:
print(tweet_dataframe.head())
# text
# 0 Tweets and python BFF <3 15121629.976126991
# 1 RT #zeroSteiner: Can now write more post modul...
# 2 •ruby• #MtvInstagLSelena #MtvColabTaylors
# 3 Ruby Necklace July Birthstone Jewelry Rosary...
# 4 #ossia I didn't see any such thing as Python. ...
A few quick summary stats show that no lines are missing, and no entries are null:
print("N tweets: {}".format(num_tweets_collected))
# N tweets: 286
print("N rows in dataframe: {}".format(tweet_dataframe.shape[0]))
# N rows in dataframe: 286
null_count = tweet_dataframe.text.isnull().sum()
print("Tweets with no text field extracted: {}".format(null_count))
# Tweets with no text field extracted: 0