Formatting json text in discord.py bot - json

#client.command()
async def show(ctx, player, *args): # General stats
rs = requests.get(apiLink + "/checkban?name=" + str(player))
if rs.status_code == 200: # HTTP OK
rs = rs.json()
joined_array = ','.join({str(rs["otherNames"]['usedNames'])})
embed = discord.Embed(title="Other users for" + str(player),
description="""User is known as:
""" +joined_array)
await ctx.send(embed=embed)
My goal here is to have every username on different lines after each comma, and preferably without the [] at the start and end. I have tried adding
joined_array = ','.join({str(rs["otherNames"]['usedNames'])}) but the response from the bot is the same as shown in the image.
Any answer or tip/suggestion is appreciated!

Try this:
array = ['user1', 'user2', 'user3', 'user4', 'user5', 'user6'] #your list
new = ",\n".join(array)
print(new)
Output:
user1,
user2,
user3,
user4,
user5,
user6
In your case I think array should be replaced with rs["otherNames"]['usedNames']

Related

I am trying to create a hit using html file for amazon mturk

import boto3
#making client object
MTURK_SANDBOX = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
mturk = boto3.client('mturk',
aws_access_key_id = "AKIA3RTXAGOQVVBX3PWF",
aws_secret_access_key = "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",
region_name='us-east-1',
endpoint_url = MTURK_SANDBOX
)
questionfile = open("/home/nm6088/mturk files/prac4_Jun27/index.html","r")
questions = questionfile .read()
localRequirements = [{
'QualificationTypeId': '00000000000000000071',
'Comparator': 'EqualTo',
'LocaleValues': [{
'Country': 'US'
}],
'RequiredToPreview': True
}]
hit = mturk.create_hit(
Title='Write a simple version of the test',
Description='A test HIT that requires the user to write a simple text.',
Keywords='simple, qualification, test',
Reward='0.01',
MaxAssignments=1,
LifetimeInSeconds=3600,
AssignmentDurationInSeconds=600,
AutoApprovalDelayInSeconds=200,
Question = questions,
QualificationRequirements=localRequirements
)
print ("A new HIT has been created. You can preview it here:")
print ("https://workersandbox.mturk.com/mturk/preview?groupId=" + hit['HIT']['HITGroupId'])
print ("HITID = " + hit['HIT']['HITId'] + " (Use to Get Results)")
botocore.exceptions.ClientError: An error occurred (ParameterValidationError) when calling the CreateHIT operation: There was an error parsing the XML question or answer data in your request. Please make sure the data is well-formed and validates against the appropriate schema. Details: cvc-elt.1.a: Cannot find the declaration of element 'HTMLQuestion'. (1656352060543 s)

How use wikidata api to access to the statements

I'm trying to get information from Wikidata. For example, to access to "cobalt-70" I use the API.
API_ENDPOINT = "https://www.wikidata.org/w/api.php"
query = "cobalt-70"
params = {
'action': 'wbsearchentities',
'format': 'json',
'language': 'en',
'search': query
}
r = requests.get(API_ENDPOINT, params = params)
print(r.json())
So there is a "claims" which gives access to the statements. Is there a best way to check if a value exists in the statement? For example, "cobalt-70" have the value 0.5 inside the property P2114. So how can I check if a value exists in the statement of the entity? As this example.
Is there an approach to access it. Thank you!
I'm not sure this is exactly what you are looking for, but if it's close enough, you can probably modify it as necessary:
import requests
import json
url = 'https://www.wikidata.org/wiki/Special:EntityData/Q18844865.json'
req = requests.get(url)
targets = j_dat['entities']['Q18844865']['claims']['P2114']
for target in targets:
values = target['mainsnak']['datavalue']['value'].items()
for value in values:
print(value[0],value[1])
Output:
amount +0.5
unit http://www.wikidata.org/entity/Q11574
upperBound +0.6799999999999999
lowerBound +0.32
amount +108.0
unit http://www.wikidata.org/entity/Q723733
upperBound +115.0
lowerBound +101.0
EDIT:
To find property id by value, try:
targets = j_dat['entities']['Q18844865']['claims'].items()
for target in targets:
line = target[1][0]['mainsnak']['datavalue']['value']
if isinstance(line,dict):
for v in line.values():
if v == "+0.5":
print('property: ',target[0])
Output:
property: P2114
I try a solution which consists to search inside the json object as the solution proposed here : https://stackoverflow.com/a/55549654/8374738. I hope it can help. Let's give you the idea.
import pprint
def search(d, search_pattern, prev_datapoint_path=''):
output = []
current_datapoint = d
current_datapoint_path = prev_datapoint_path
if type(current_datapoint) is dict:
for dkey in current_datapoint:
if search_pattern in str(dkey):
c = current_datapoint_path
c+="['"+dkey+"']"
output.append(c)
c = current_datapoint_path
c+="['"+dkey+"']"
for i in search(current_datapoint[dkey], search_pattern, c):
output.append(i)
elif type(current_datapoint) is list:
for i in range(0, len(current_datapoint)):
if search_pattern in str(i):
c = current_datapoint_path
c += "[" + str(i) + "]"
output.append(i)
c = current_datapoint_path
c+="["+ str(i) +"]"
for i in search(current_datapoint[i], search_pattern, c):
output.append(i)
elif search_pattern in str(current_datapoint):
c = current_datapoint_path
output.append(c)
output = filter(None, output)
return list(output)
And you just need to use:
pprint.pprint(search(res.json(),'0.5','res.json()'))
Output:
["res.json()['claims']['P2114'][0]['mainsnak']['datavalue']['value']['amount']"]

If statement not returning the desired result

I'm new to Python and I believe the issue with my code is being caused by the fact that I'm a newbie and there's some theory or something that I must not be familiar with yet.
Yes, this question was asked before but, is different from mine. Believe me I tried everything that I thought that needs to be done.
Everything worked until I added everything in "if five in silos" statement.
After I enter the values for the 6 input functions, the program just finishes with exit code 0. Nothing else happens. The for loop is not initiated.
I want for the code to accept either 103 or 106 when prompting to enter something for the "five" variable.
I'm using PyCharm and Python 3.7.
import mysql.connector
try:
db = mysql.connector.connect(
host="",
user="",
passwd="",
database=""
)
one = int(input("Number of requested telephone numbers: "))
two = input("Enter the prefix (4 characters) with a leading 0: ")[:4]
three = int(input("Enter the ccid: "))
four = int(input("Enter the cid: "))
six = input("Enter case number: ")
five = int(input("Enter silo (103, 106 only): "))
cursor = db.cursor()
cursor.execute(f"SELECT * FROM n1 WHERE ddi LIKE '{two}%' AND silo = 1 AND ccid = 0 LIMIT {one}")
cursor.fetchall()
silos = (103, 106)
if five in silos:
if cursor.rowcount > 0:
for row in cursor:
seven = input(f"{row[1]} has been found on our system. Do you want to continue? Type either Y or N.")
if seven == "Y":
cursor.execute(f"INSERT INTO n{five} (ddi, silo, ccid, campaign, assigned, allocated, "
f"internal_notes, client_notes, agentid, carrier, alias) VALUES "
f"('{row[1]}', 1, {three}, {four}, NOW(), NOW(), 'This is a test.', '', 0, "
f"'{row[13]}', '') "
f"ON DUPLICATE KEY UPDATE "
f"silo = VALUES (silo), "
f"ccid = VALUES (ccid), "
f"campaign = VALUES (campaign);")
cursor.execute(f"UPDATE n1 SET silo = {five}, internal_notes = '{six}', allocated = NOW() WHERE "
f"ddi = '{row[1]}'")
else:
print("The operation has been canceled.")
db.commit()
else:
print(f"No results for prefix {two}.")
else:
print("Enter either silo 103 or 106.")
cursor.close()
db.close()
except (ValueError, NameError):
print("Please, enter an integer for all questions, except case number.")
Because it must be:
for row in cursor.fetchall():
// do something
In your code cursor returns a Python Class defined by db.cursor() but you need to call the fetchall() function to read the rows contained in it.
You're actually calling cursor.fetchall() without doing nothing with it, you can assign the call to a variable and than do this:
result = cursor.fetchall()
for row in result:
//do something
I found the problem: I had to store cursor.fetchall() into a variable.
After I put: eight = cursor.fetchall() before the "silos" tuple, everything worked perfectly.

Is there a way to take a list of strings and create a JSON file, where both the key and value are list items?

I am creating a python script that can read scanned, and tabular .pdfs and extract some important data and insert it into a JSON to later be implemented into a SQL database (I will also be developing the DB as a project for learning MongoDB).
Basically, my issue is I have never worked with any JSON files before but that was the format I was recommended to output to. The scraping script works, the pre-processing could be a lot cleaner, but for now it works. The issue I run into is the keys, and values are in the same list, and some of the values because they had a decimal point are two different list items. Not really sure where to even start.
I don't really know where to start, I suppose since I know what the indexes of the list are I can easily assign keys and values, but then it may not be applicable to any .pdf, that is the script cannot be coded explicitly.
import PyPDF2 as pdf2
import textract
with "TestSpec.pdf" as filename:
pdfFileObj = open(filename, 'rb')
pdfReader = pdf2.pdfFileReader(pdfFileObj)
num_pages = pdfReader.numpages
count = 0
text = ""
while count < num_pages:
pageObj = pdfReader.getPage(0)
count += 1
text += pageObj.extractText()
if text != "":
text = text
else:
text = textract.process(filename, method="tesseract", language="eng")
def cleanText(x):
'''
This function takes the byte data extracted from scanned PDFs, and cleans it of all
unnessary data.
Requires re
'''
stringedText = str(x)
cleanText = stringedText.replace('\n','')
splitText = re.split(r'\W+', cleanText)
caseingText = [word.lower() for word in splitText]
cleanOne = [word for word in caseingText if word != 'n']
dexStop = cleanOne.index("od260")
dexStart = cleanOne.index("sheet")
clean = cleanOne[dexStart + 1:dexStop]
return clean
cleanText = cleanText(text)
This is the current output
['n21', 'feb', '2019', 'nsequence', 'lacz', 'rp', 'n5', 'gat', 'ctc', 'tac', 'cat', 'ggc', 'gca', 'cat', 'ttc', 'ccc', 'gaa', 'aag', 'tgc', '3', 'norder', 'no', '15775199', 'nref', 'no', '207335463', 'n25', 'nmole', 'dna', 'oligo', '36', 'bases', 'nproperties', 'amount', 'of', 'oligo', 'shipped', 'to', 'ntm', '50mm', 'nacl', '66', '8', 'xc2', 'xb0c', '11', '0', '32', '6', 'david', 'cook', 'ngc', 'content', '52', '8', 'd260', 'mmoles', 'kansas', 'state', 'university', 'biotechno', 'nmolecular', 'weight', '10', '965', '1', 'nnmoles']
and we want the output as a JSON setup like
{"Date | 21feb2019", "Sequence ID: | lacz-rp", "Sequence 5'-3' | gat..."}
and so on. Just not sure how to do that.
here is a screenshot of the data from my sample pdf
So, i have figured out some of this. I am still having issues with grabbing the last 3rd of the data i need without explicitly programming it in. but here is what i have so far. Once i have everything working then i will worry about optimizing it and condensing.
# for PDF reading
import PyPDF2 as pdf2
import textract
# for data preprocessing
import re
from dateutil.parser import parse
# For generating the JSON file array
import json
# This finds and opens the pdf file, reads the data, and extracts the data.
filename = "*.pdf"
pdfFileObj = open(filename, 'rb')
pdfReader = pdf2.PdfFileReader(pdfFileObj)
text = ""
pageObj = pdfReader.getPage(0)
text += pageObj.extractText()
# checks if extracted data is in string form or picture, if picture textract reads data.
# it then closes the pdf file
if text != "":
text = text
else:
text = textract.process(filename, method="tesseract", language="eng")
pdfFileObj.close()
# Converts text to string from byte data for preprocessing
stringedText = str(text)
# Removed escaped lines and replaced them with actual new lines.
formattedText = stringedText.replace('\\n', '\n').lower()
# Slices the long string into a workable piece (only contains useful data)
slice1 = formattedText[(formattedText.index("sheet") + 10): (formattedText.index("secondary") - 2)]
clean = re.sub('\n', " ", slice1)
clean2 = re.sub(' +', ' ', clean)
# Creating the PrimerData dictionary
with open("PrimerData.json",'w') as file:
primerDataSlice = clean[clean.index("molecular"): -1]
primerData = re.split(": |\n", primerDataSlice)
primerKeys = primerData[0::2]
primerValues = primerData[1::2]
primerDict = {"Primer Data": dict(zip(primerKeys,primerValues))}
# Generatring the JSON array "Primer Data"
primerJSON = json.dumps(primerDict, ensure_ascii=False)
file.write(primerJSON)
# Grabbing the date (this has just the date, so json will have to add date.)
date = re.findall('(\d{2}[\/\- ](\d{2}|january|jan|february|feb|march|mar|april|apr|may|may|june|jun|july|jul|august|aug|september|sep|october|oct|november|nov|december|dec)[\/\- ]\d{2,4})', clean2)
Without input data it is difficult to give you working code. A minimal working example with input would help. As for JSON handling, python dictionaries can dump to json easily. See examples here.
https://docs.python-guide.org/scenarios/json/
Get a json string from a dictionary and write to a file. Figure out how to parse the text into a dictionary.
import json
d = {"Date" : "21feb2019", "Sequence ID" : "lacz-rp", "Sequence 5'-3'" : "gat"}
json_data = json.dumps(d)
print(json_data)
# Write that data to a file
So, I did figure this out, the problem was really just that because of the way my pre-processing was pulling all the data into a single list wasn't really that great of an idea considering that the keys for the dictionary never changed.
Here is the semi-finished result for making the Dictionary and JSON file.
# Collect the sequence name
name = clean2[clean2.index("Sequence") + 11: clean2.index("Sequence") + 19]
# Collecting Shipment info
ordered = input("Who placed this order? ")
received = input("Who is receiving this order? ")
dateOrder = re.findall(
r"(\d{2}[/\- ](\d{2}|January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|Jul|August|Aug|September|Sep|October|Oct|November|Nov|December|Dec)[/\- ]\d{2,4})",
clean2)
dateReceived = date.today()
refNo = clean2[clean2.index("ref.No. ") + 8: clean2.index("ref.No.") + 17]
orderNo = clean2[clean2.index("Order No.") +
10: clean2.index("Order No.") + 18]
# Finding and grabbing the sequence data. Storing it and then finding the
# GC content and melting temp or TM
bases = int(clean2[clean2.index("bases") - 3:clean2.index("bases") - 1])
seqList = [line for line in clean2 if re.match(r'^[AGCT]+$', line)]
sequence = "".join(i for i in seqList[:bases])
def gc_content(x):
count = 0
for i in x:
if i == 'G' or i == 'C':
count += 1
else:
count = count
return round((count / bases) * 100, 1)
gc = gc_content(sequence)
tm = mt.Tm_GC(sequence, Na=50)
moleWeight = round(mw(Seq(sequence, generic_dna)), 2)
dilWeight = float(clean2[clean2.index("ug/OD260:") +
10: clean2.index("ug/OD260:") + 14])
dilution = dilWeight * 10
primerDict = {"Primer Data": {
"Sequence": sequence,
"Bases": bases,
"TM (50mM NaCl)": tm,
"% GC content": gc,
"Molecular weight": moleWeight,
"ug/0D260": dilWeight,
"Dilution volume (uL)": dilution
},
"Shipment Info": {
"Ref. No.": refNo,
"Order No.": orderNo,
"Ordered by": ordered,
"Date of Order": dateOrder,
"Received By": received,
"Date Received": str(dateReceived.strftime("%d-%b-%Y"))
}}
# Generating the JSON array "Primer Data"
with open("".join(name) + ".json", 'w') as file:
primerJSON = json.dumps(primerDict, ensure_ascii=False)
file.write(primerJSON)

Failed processing pyformat-parameters; 'MySQLConverter' object has no attribute '_list_to_mysql'

I have a python script which everytime a wait_for_page call is made it writes the time it took to wait for the page to a database. The query is below:
conn = mysql.connector.connect(**config)
connect = conn.cursor()
params = {'build': self.tc.tag, 'page': unicode(self), 'object_id': self.object_id, 'page_header':
self.page_header, 'interval': t.interval, 'timestamp': timestamp}
query = u'INSERT INTO page_load_times (build, page, object_id, page_header, elapsed_time, date_run) ' \
'VALUES (%(build)s, %(page)s, %(object_id)s, %(page_header)s, %(interval)s, %(timestamp)s)'
connect.execute(query, params)
conn.commit()
conn.close()
Occasionally, when this runs, I get an error which says:
"Failed processing pyformat-parameters; %s" % err)
ProgrammingError: Failed processing pyformat-parameters; 'MySQLConverter'
object has no attribute '_list_to_mysql'
I know what is causing this, just uncertain how to go about fixing it. The 'page': unicode(self) param occasionally gets a list as a result.
In an attempt to fix this, I tweaked the above script to extract the list into a string, with the following:
page_list = u'{}'.format(self)
page_results = "('%s')" % "','".join(page_list)
params = {'build': self.tc.tag, 'page': page_results, 'object_id': self.object_id, 'page_header':
self.page_header, 'interval': t.interval, 'timestamp': timestamp}
When I run this, the error I am getting now is that the data is too long for the field. I debug it, to find that my page results has each character parsed out individually looking like so:
u'(\\'A\\',\\'p\\',\\'p\\',\\'M\\',\\'a\\',\\'i\\',\\'n\\',\\'M\\',\\'e\\',\\'n\\',\\'u\\',\\':\\',\\' \\',\\'N\\',\\'o\\',\\'n\\',\\'e\\')'
So the solution was to do the following, which takes the page_header and if it is in the instance of list to make that list a string:
conn = mysql.connector.connect(**config)
connect = conn.cursor()
page_list = u'{}'.format(self)
page_header_list = u'{}'.format(self.page_header)
if isinstance(page_header_list, list):
page_header_list = ', '.join(page_header_list)[0:100]
params = {'build': self.tc.tag, 'page': page_list, 'object_id': self.object_id,
'page_header': page_header_list, 'interval': t.interval, 'timestamp': timestamp}
query = u'INSERT INTO page_load_times (build, page, object_id, page_header, elapsed_time, date_run) ' \
'VALUES (%(build)s, %(page)s, %(object_id)s, %(page_header)s, %(interval)s, %(timestamp)s)'
connect.execute(query, params)
conn.commit()
conn.close()
Thank you #DarthOpto, you gave me lots of light. I solved it by putting srt around the variable:
params = {'build': self.tc.tag, 'page': srt(page_list),'object_id': self.object_id,
'page_header': srt(page_header_list),'interval': t.interval,'timestamp': timestamp}