Parsing JSON element (Iterate over list of elements)

Parsing JSON element (Iterate over list of elements) - json

I created a script where it creates a JSON file where the list of my server URLs and API keys are stored. Now, I need to get those element (URL & API keys) in order for me to get all the users on each server. I'm stuck at the point where I need to iterate on all server URLs and declare them as a variable.
Below is my sample code.
import sys
import json
import testlink
import xmlrpc.client
import requests
import xml.etree.ElementTree as ET
import openpyxl
from openpyxl import Workbook
requests.packages.urllib3.disable_warnings()
# Create a Json file that compose of TESTLINK_API_PYTHON_SERVER_URL and TESTLINK_API_PYTHON_DEVKEY
def serverJson():
serverDictionary = {
"servers": [
{
"server_name": "https://firstInstance/lib/api/xmlrpc/v1/xmlrpc.php",
"devKey": "1234567890abcdef"
},
{
"server_name": "https://secondInstance/lib/api/xmlrpc/v1/xmlrpc.php",
"devKey": "0987654321fedcba"
}
]
}
# Create json file
with open("server.json", "w") as server:
json.dump(serverDictionary, server)
# Return TESTLINK_API_PYTHON_SERVER_URL and TESTLINK_API_PYTHON_DEVKEY
def serverList():
serverJson()
# Open json file
server_file = open('server.json')
# Return JSON Object as dictionary
data = json.load(server_file)
# Iterating through the json list
for servers in data['servers']:
serverName = servers.get('server_name')
devKey = servers.get('devKey')
print(serverName, devKey)
# Start Testlink-API-Call
TESTLINK_API_PYTHON_SERVER_URL = str()
TESTLINK_API_PYTHON_DEVKEY = str()
tls = testlink.TestlinkAPIClient(TESTLINK_API_PYTHON_SERVER_URL, TESTLINK_API_PYTHON_DEVKEY)
# IF Else to each instance & devKey
# First instance
if TESTLINK_API_PYTHON_SERVER_URL == (firstServerURL) and TESTLINK_API_PYTHON_DEVKEY == (firstDevKey):
print("----------User list for First Instance----------")
tree = ET.parse('usersTLFirstInstance.xml')
root = tree.getroot()
for user in root.findall('user'):
loginID = user.find('id').text
for tl_first_user in tls.getUserByID(loginID):
first_name = tl_first_user.get('firstName')
print(loginID, first_name)
print("----------Ending List for First Instance----------")
# Second instance
elif TESTLINK_API_PYTHON_SERVER_URL == (secondServerURL) and TESTLINK_API_PYTHON_DEVKEY == (secondDevKey):
print("----------User list for Second Instance----------")
tree = ET.parse('usersTLSecondInstance.xml')
root = tree.getroot()
for user in root.findall('user'):
loginID = user.find('id').text
for tl_second_user in tls.getUserByID(loginID):
first_name = tl_second_user.get('firstName')
print(loginID, first_name)
print("----------Ending List for Second Instance----------")
serverList()
Here is my JSON File that I created.
{
"servers": [
{
"server_name": "https://firstInstance/lib/api/xmlrpc/v1/xmlrpc.php",
"devKey": "1234567890abcdef"
},
{
"server_name": "https://secondInstance/lib/api/xmlrpc/v1/xmlrpc.php",
"devKey": "0987654321fedcba"
}
]
}

Related

Read nested data from json using .proto in python

I want to read nested data from a json. I have created a .proto file based on the json but still I am not able to read nested data from this said json.
nested.proto --> compiling using protoc --python_out=$PWD nested.proto
syntax = "proto2";
message Employee{
required int32 EMPLOYEE_ID = 1;
message ListItems {
required string FULLADDRESS = 1;
}
repeated ListItems EMPLOYEE_ADDRESS = 2;
}
nested.json
{
"EMPLOYEE_ID": 5044,
"EMPLOYEE_ADDRESS": [
{
"FULLADDRESS": "Suite 762"
}
]
}
parse.py
#!/usr/bin/env python3
import json
from google.protobuf.json_format import Parse
import nested_pb2 as np
input_file = "nested.json"
if __name__ == "__main__":
# reading json file
f = open(input_file, 'rb')
content = json.load(f)
# initialize emp_table here
emp_table = np.Employee()
employee = Parse(json.dumps(content), emp_table, True)
print(employee.EMPLOYEE_ID) #output: 5044
emp_table = np.Employee().ListItems()
items = Parse(json.dumps(content), emp_table, True)
print(items.FULLADDRESS) #output: NO OUTPUT (WHY?)

Couple of things:
The type is ListItems but the name is EMPLOYEE_ADDRESS
Python is awkward (!) with repeated's
You're writing more code than you need
I recommend adhering to the style guide if you can.
Try:
#!/usr/bin/env python3
import json
from google.protobuf.json_format import Parse
import nested_pb2 as np
input_file = "nested.json"
if __name__ == "__main__":
# reading json file
f = open(input_file, 'rb')
content = json.load(f)
# initialize emp_table here
emp_table = np.Employee()
employee = Parse(json.dumps(content), emp_table, True)
print(employee.EMPLOYEE_ID) #output: 5044
for item in employee.EMPLOYEE_ADDRESS:
print(item)

This XML file does not appear to have any style information associated with it when try to download the file in Google Cloud Storage

I have a cloud functions that copy and paste a file from one standard bucket to a Nearline bucket. I also tried to save the file by opening as a dataframe and write it as dask dataframe. They both worked but Every time I try to download the file through the
GUI I get the an XML error message as stated below. Does anyone know why this is happening? How Can I prevent it to happen?
This XML file does not appear to have any style information associated with it
import base64
import json
from google.cloud import storage
import dask.dataframe as dd
import pandas as pd
def hello_pubsub(event, context):
"""Triggered from a message on a Cloud Pub/Sub topic.
Args:
event (dict): Event payload.
context (google.cloud.functions.Context): Metadata for the event.
"""
print('here')
print(event)
pubsub_message = base64.b64decode(event['data']).decode('utf-8')
payload = json.loads(pubsub_message)
bucket_name = payload['data']['bucket_name']
print(bucket_name)
blob_name = payload['data']['file_name']
print(blob_name)
destination_bucket_name = 'infobip-email-uploaded'
#destination_blob_name = blob_name[0:10]+'.csv'
destination_blob_name = 'ddf-*.csv'
df = pd.read_excel('gs://'+bucket_name+'/'+blob_name, sheet_name='Data', engine='xlrd')
print('excel has been read')
ddf = dd.from_pandas(df,npartitions=1, sort=True)
print('dataframe has been transformed into dask')
path = 'gs://'+destination_bucket_name +'/'+ destination_blob_name
print('path is')
print(path)
ddf.to_csv(path, index=False, sep=',', header=False)
destination_blob_name = blob_name[0:10]+'.xlsx'
copy_blob(bucket_name,blob_name,destination_bucket_name,destination_blob_name)
print('File has been successfully copied')
delete_blob(bucket_name,blob_name)
print('File has been successfully deleted')
return '200'
def copy_blob(bucket_name, blob_name, destination_bucket_name, destination_blob_name):
"""Copies a blob from one bucket to another with a new name."""
# bucket_name = "your-bucket-name"
# blob_name = "your-object-name"
# destination_bucket_name = "destination-bucket-name"
# destination_blob_name = "destination-object-name"
storage_client = storage.Client()
source_bucket = storage_client.bucket(bucket_name)
source_blob = source_bucket.blob(blob_name)
destination_bucket = storage_client.bucket(destination_bucket_name)
blob_copy = source_bucket.copy_blob(
source_blob, destination_bucket, destination_blob_name
)
print(
"Blob {} in bucket {} copied to blob {} in bucket {}.".format(
source_blob.name,
source_bucket.name,
blob_copy.name,
destination_bucket.name,
)
)
def delete_blob(bucket_name, blob_name):
"""Deletes a blob from the bucket."""
# bucket_name = "your-bucket-name"
# blob_name = "your-object-name"
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
blob.delete()
print("Blob {} deleted.".format(blob_name))

Trying to parse access.log

Good afternoon, I'm trying to find the top 10 ip in access.log (standard log of the Apache server).
There is a code like this:
import argparse
import json
import re
from collections import defaultdict, Counter
parser = argparse.ArgumentParser(description='parser script')
parser.add_argument('-f', dest='logfile', action='store', default='access.log')
args = parser.parse_args()
regul_ip = (r"^(?P<ips>.*?)")
regul_method = (r"\"(?P<request_method>GET|POST|PUT|DELETE|HEAD)")
def req_by_method():
dict_ip = defaultdict(lambda: {"GET": 0, "POST": 0, "PUT": 0, "DELETE": 0, "HEAD": 0})
with open(args.logfile) as file:
for index, line in enumerate(file.readlines()):
try:
ip = re.search(regul_ip, line).group()
method = re.search(regul_method, line).groups()[0]
return Counter(dict_ip).most_common(10)
except AttributeError:
pass
dict_ip[ip][method] += 1
print(json.dumps(dict_ip, indent=4))
with open("final_log.json", "w") as jsonfile:
json.dump(dict_ip, jsonfile, indent=5)
When the code is executed, I only get: []
How can I fix this code to make it work?
I also need to output to the final json file a set of such lines: "ip", "method", "status code", "url" and the duration of the request

Dynamically create REST test steps in groovy

I have some JSON file that I want to use as input, let's consider that I have this folders
mainFolder --> Folder 1 : 10 JSON file (req)
--> Folder 2 : 10 JSON file (req)
I want to create from these folders : - Each directory is a testCase - Each file is a testStep
Here's my code :
import com.eviware.soapui.impl.wsdl.teststeps.registry.GroovyScriptStepFactory
import com.eviware.soapui.support.UISupport;
import com.eviware.soapui.impl.wsdl.teststeps.registry.RestRequestStepFactory
import com.eviware.soapui.config.TestStepConfig
import com.eviware.soapui.impl.rest.*;
def myTestCase = context.testCase
log.info myTestCase
def projectPath = Path
def endPoint = "anEndPoint";
def addTestStep(operation, requestFile, testCase, projectPath, endpoint){
def usageId = requestFile.name.replace("_request.json","")
def projectPathTest = projectPath+"SPecificPath";
def testStepName=usageId;
def iface = testCase.testSuite.project.getInterfaceList()[0];
def operationName= operation;
def op = iface.operations[operationName];
def config = com.eviware.soapui.impl.wsdl.teststeps.registry.RestRequestStepFactory.createConfig( op, testStepName);
def newTestStep = testCase.addTestStep( config );
newTestStep.getTestRequest().setRequestContent(requestFile.text)
newTestStep.httpRequest.endpoint = endpoint
}
if ( com.eviware.soapui.support.UISupport.confirm("Reconstruct ?","Confirm") ){
testSuite.getTestCaseList().each{testCase->testSuite.removeTestCase(testCase)}
new File(projectPathTest).eachDir{dir->
operation = dir.name
def RestTestCase = testSuite.addNewTestCase(operation)
RestTestCase.setFailOnError(false)
dir.eachFileMatch(~/.*_request\.json/){file->
addTestStep(operation, file, RestTestCase, projectPath, endPoint)
}
}
}
I verified many times, many pages, many forums, it seems that I have the correct form and algorithm to get what I want, instead, I succeed to create testCase with the names of the folders, I succeed to get the request JSON file, but I fail to create the test step, and I'm pretty sure it's either the config or interface/operation who make it fail :
def config = com.eviware.soapui.impl.wsdl.teststeps.registry.RestRequestStepFactory.createConfig( op, testStepName);
Any help please ?

Fail to store data in csv file through scraping

I try to scraping a webpage and extracting data ,then store all data in a csv file. Before adding ScrapeCallback class and calling it, everything works fine. However, it does not store any type of data except headers in the cvs file after adding the new class. Can anyone help me to figure out the problem?
import re
import urlparse
import urllib2
import time
from datetime import datetime
import robotparser
import Queue
import csv
import lxml.html
class ScrapeCallback:
# extract and store all data in a csv file
def __init__( self):
self.writer = csv.writer(open('countries.csv', 'w'))
self.fields = ('area', 'population', 'iso', 'country', 'capital', 'continent', 'tld', 'currency_code', 'currency_name', 'phone', 'postal_code_format', 'postal_code_regex', 'languages', 'neighbours')
self.writer.writerow( self.fields)
def __call__( self, url, html):
if re.search('/view/',url):
tree = lxml.html.fromstring(html)
row = []
for field in self.fields:
row.append(tree.cssselect('table > tr#places_{}__row > td.w2p_fw'.format(field))[0].text_content())
print row
self.writer.writerow(row)
def link_crawler(seed_url, link_regex=None, delay=5, max_depth=-1, max_urls=-1, headers=None, user_agent='wswp', proxy=None, num_retries=1, scrape_callback=None):
"""Crawl from the given seed URL following links matched by link_regex
"""
# the queue of URL's that still need to be crawled
crawl_queue = [seed_url]
# the URL's that have been seen and at what depth
seen = {seed_url: 0}
# track how many URL's have been downloaded
num_urls = 0
rp = get_robots(seed_url)
throttle = Throttle(delay)
headers = headers or {}
if user_agent:
headers['User-agent'] = user_agent
while crawl_queue:
url = crawl_queue.pop()
depth = seen[url]
# check url passes robots.txt restrictions
if rp.can_fetch(user_agent, url):
throttle.wait(url)
html = download(url, headers, proxy=proxy, num_retries=num_retries)
links = []
if scrape_callback:
links.extend(scrape_callback(url, html) or [])
if depth != max_depth:
# can still crawl further
if link_regex:
# filter for links matching our regular expression
links.extend(link for link in get_links(html) if re.match(link_regex, link))
for link in links:
link = normalize(seed_url, link)
# check whether already crawled this link
if link not in seen:
seen[link] = depth + 1
# check link is within same domain
if same_domain(seed_url, link):
# success! add this new link to queue
crawl_queue.append(link)
# check whether have reached downloaded maximum
num_urls += 1
if num_urls == max_urls:
break
else:
print 'Blocked by robots.txt:', url
class Throttle:
"""Throttle downloading by sleeping between requests to same domain
"""
def __init__(self, delay):
# amount of delay between downloads for each domain
self.delay = delay
# timestamp of when a domain was last accessed
self.domains = {}
def wait(self, url):
"""Delay if have accessed this domain recently
"""
domain = urlparse.urlsplit(url).netloc
last_accessed = self.domains.get(domain)
if self.delay > 0 and last_accessed is not None:
sleep_secs = self.delay - (datetime.now() - last_accessed).seconds
if sleep_secs > 0:
time.sleep(sleep_secs)
self.domains[domain] = datetime.now()
def download(url, headers, proxy, num_retries, data=None):
print 'Downloading:', url
request = urllib2.Request(url, data, headers)
opener = urllib2.build_opener()
if proxy:
proxy_params = {urlparse.urlparse(url).scheme: proxy}
opener.add_handler(urllib2.ProxyHandler(proxy_params))
try:
response = opener.open(request)
html = response.read()
code = response.code
except urllib2.URLError as e:
print 'Download error:', e.reason
html = ''
if hasattr(e, 'code'):
code = e.code
if num_retries > 0 and 500 <= code < 600:
# retry 5XX HTTP errors
html = download(url, headers, proxy, num_retries-1, data)
else:
code = None
return html
def normalize(seed_url, link):
"""Normalize this URL by removing hash and adding domain
"""
link, _ = urlparse.urldefrag(link) # remove hash to avoid duplicates
return urlparse.urljoin(seed_url, link)
def same_domain(url1, url2):
"""Return True if both URL's belong to same domain
"""
return urlparse.urlparse(url1).netloc == urlparse.urlparse(url2).netloc
def get_robots(url):
"""Initialize robots parser for this domain
"""
rp = robotparser.RobotFileParser()
rp.set_url(urlparse.urljoin(url, '/robots.txt'))
rp.read()
return rp
def get_links(html):
"""Return a list of links from html
"""
# a regular expression to extract all links from the webpage
webpage_regex = re.compile('<a[^>]+href=["\'](.*?)["\']', re.IGNORECASE)
# list of all links from the webpage
return webpage_regex.findall(html)
if __name__ == '__main__':
# link_crawler('http://example.webscraping.com', '/(index|view)', delay=0, num_retries=1, user_agent='BadCrawler')
# link_crawler('http://example.webscraping.com', '/(index|view)', delay=0, num_retries=1, max_depth=1, user_agent='GoodCrawler')
link_crawler('http://example.webscraping.com', '/(index|view)', max_depth =2, scrape_callback = ScrapeCallback())

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Parsing JSON element (Iterate over list of elements) - json

Related

Read nested data from json using .proto in python

This XML file does not appear to have any style information associated with it when try to download the file in Google Cloud Storage

Trying to parse access.log

Dynamically create REST test steps in groovy

Fail to store data in csv file through scraping

Categories

Resources