Trying to parse access.log - json

Good afternoon, I'm trying to find the top 10 ip in access.log (standard log of the Apache server).
There is a code like this:
import argparse
import json
import re
from collections import defaultdict, Counter
parser = argparse.ArgumentParser(description='parser script')
parser.add_argument('-f', dest='logfile', action='store', default='access.log')
args = parser.parse_args()
regul_ip = (r"^(?P<ips>.*?)")
regul_method = (r"\"(?P<request_method>GET|POST|PUT|DELETE|HEAD)")
def req_by_method():
dict_ip = defaultdict(lambda: {"GET": 0, "POST": 0, "PUT": 0, "DELETE": 0, "HEAD": 0})
with open(args.logfile) as file:
for index, line in enumerate(file.readlines()):
try:
ip = re.search(regul_ip, line).group()
method = re.search(regul_method, line).groups()[0]
return Counter(dict_ip).most_common(10)
except AttributeError:
pass
dict_ip[ip][method] += 1
print(json.dumps(dict_ip, indent=4))
with open("final_log.json", "w") as jsonfile:
json.dump(dict_ip, jsonfile, indent=5)
When the code is executed, I only get: []
How can I fix this code to make it work?
I also need to output to the final json file a set of such lines: "ip", "method", "status code", "url" and the duration of the request

Related

Parsing JSON element (Iterate over list of elements)

I created a script where it creates a JSON file where the list of my server URLs and API keys are stored. Now, I need to get those element (URL & API keys) in order for me to get all the users on each server. I'm stuck at the point where I need to iterate on all server URLs and declare them as a variable.
Below is my sample code.
import sys
import json
import testlink
import xmlrpc.client
import requests
import xml.etree.ElementTree as ET
import openpyxl
from openpyxl import Workbook
requests.packages.urllib3.disable_warnings()
# Create a Json file that compose of TESTLINK_API_PYTHON_SERVER_URL and TESTLINK_API_PYTHON_DEVKEY
def serverJson():
serverDictionary = {
"servers": [
{
"server_name": "https://firstInstance/lib/api/xmlrpc/v1/xmlrpc.php",
"devKey": "1234567890abcdef"
},
{
"server_name": "https://secondInstance/lib/api/xmlrpc/v1/xmlrpc.php",
"devKey": "0987654321fedcba"
}
]
}
# Create json file
with open("server.json", "w") as server:
json.dump(serverDictionary, server)
# Return TESTLINK_API_PYTHON_SERVER_URL and TESTLINK_API_PYTHON_DEVKEY
def serverList():
serverJson()
# Open json file
server_file = open('server.json')
# Return JSON Object as dictionary
data = json.load(server_file)
# Iterating through the json list
for servers in data['servers']:
serverName = servers.get('server_name')
devKey = servers.get('devKey')
print(serverName, devKey)
# Start Testlink-API-Call
TESTLINK_API_PYTHON_SERVER_URL = str()
TESTLINK_API_PYTHON_DEVKEY = str()
tls = testlink.TestlinkAPIClient(TESTLINK_API_PYTHON_SERVER_URL, TESTLINK_API_PYTHON_DEVKEY)
# IF Else to each instance & devKey
# First instance
if TESTLINK_API_PYTHON_SERVER_URL == (firstServerURL) and TESTLINK_API_PYTHON_DEVKEY == (firstDevKey):
print("----------User list for First Instance----------")
tree = ET.parse('usersTLFirstInstance.xml')
root = tree.getroot()
for user in root.findall('user'):
loginID = user.find('id').text
for tl_first_user in tls.getUserByID(loginID):
first_name = tl_first_user.get('firstName')
print(loginID, first_name)
print("----------Ending List for First Instance----------")
# Second instance
elif TESTLINK_API_PYTHON_SERVER_URL == (secondServerURL) and TESTLINK_API_PYTHON_DEVKEY == (secondDevKey):
print("----------User list for Second Instance----------")
tree = ET.parse('usersTLSecondInstance.xml')
root = tree.getroot()
for user in root.findall('user'):
loginID = user.find('id').text
for tl_second_user in tls.getUserByID(loginID):
first_name = tl_second_user.get('firstName')
print(loginID, first_name)
print("----------Ending List for Second Instance----------")
serverList()
Here is my JSON File that I created.
{
"servers": [
{
"server_name": "https://firstInstance/lib/api/xmlrpc/v1/xmlrpc.php",
"devKey": "1234567890abcdef"
},
{
"server_name": "https://secondInstance/lib/api/xmlrpc/v1/xmlrpc.php",
"devKey": "0987654321fedcba"
}
]
}

Download PDFs from multiple JSON URLs using Python

I have been tasked to create a method to download multiple PDFs from URLs included in JSON files. Probably 1 URL per JSON file, with approx 500k JSON files to process in any one batch.
Here's a sample of the JSON file:
{
"from": null,
"id": "sfm_c4kjatol7u8psvqfati0",
"imb_code": "897714123456789",
"mail_date": null,
"mail_type": "usps_first_class",
"object": "self_mailer",
"press_proof": "https://lob-assets.com/sid-self_mailers/sfm_c4kjatol7u8psvqfati0.pdf?version=v1&expires=1635274615&signature=AZlb0MSzZPuCjtKFkXRr_OoHzDzEy23UqzmKFWs5bycKCEcIyfe2od58zHzfP1a-iW5d9azFYUT1PnosqKcvBg",
"size": "11x9_bifold",
"target_delivery_date": null,
"to": {
"address_city": "SAN FRANCISCO",
"address_country": "UNITED STATES",
"address_line1": "185 BERRY ST STE 6100",
"address_line2": null,
"address_state": "CA",
"address_zip": "94107-1741",
"company": "Name.COM",
"name": "EMILE ILES"
}
}
The JSON file is converted to CSV and the URL is downloaded.
Here's what I have been trying to use but it is not working. What am I missing?
Import urllib.request, json, requests, os, csvkit
from itertools import islice
from pathlib import Path
path = Path("/Users/MyComputer/Desktop/self_mailers")
paths = [i.path for i in islice(os.scandir(path), 100)]
in2csv data.json > data.csv
with open('*.json', 'r') as f:
urls_dict = json.load(f)
urls_dict = urls_dict[0]
itr = iter(urls_dict)
len(list(itr))
f.write(r.pdf)
Why are you converting your JSON to a CSV?
Btw, if you are unsure of where are the urls in the jsons, I would do this:
import os
import json
from rethreader import Rethreader
from urllib.parse import urlparse
from urllib.request import urlretrieve
def download_pdf(url):
# use urlparse to find the pdf name
filename = urlparse(url).path.rsplit('/')[-1]
urlretrieve(url, filename)
# use multi-threading for faster downloads
downloader = Rethreader(download_pdf).start()
def verify_url(value):
if not isinstance(value, str):
# if the value is not a string, it's neither an url
return False
try:
parsed_url = urlparse(value)
except AttributeError:
# value cannot be parsed as url
return False
if not (parsed_url.scheme and parsed_url.netloc and parsed_url.path):
# value cannot be an url because it does not have the right scheme
return False
return True
def parse_data(data):
for value in data.values():
if verify_url(value):
downloader.add(value)
for file in os.listdir():
with open(file) as fp:
try:
json_data = json.load(fp)
except (json.JSONDecodeError, UnicodeDecodeError):
# this file is not a json; let's skip to the next one
continue
parse_data(json_data)
# quit the downloader after downloading the files
downloader.quit()
If you know in what possible keys can be the urls, I would do as this:
# The other parts same as before
def parse_data(data):
for key in ['possible_key', 'another_possible_key']:
if key in data and verify_url(data[key]):
downloader.add(data[key])

After updating json file with groovy, the file data contains extra curly brackets and "content" object

This is sort of data I got in my json file
{"globals":{"code":"1111","country_code":"8888","hits":80,"extra_hit":1,"keep_money":true},"time_window":{"from":"2020.12.14 08:40:00","to":"2020.12.14 08:45:00"},"car":{"have":"nope"}}
After I run it through this groovy code in jmeter:
import groovy.json.JsonSlurper
import groovy.json.JsonBuilder
import groovy.json.JsonOutput
def jsonSlurper = new JsonSlurper().parse(new File("C:/pathToFile/test.json"))
log.info(jsonSlurper.toString())
jsonSlurper.globals.hits = 70
jsonSlurper.time_window.from = "2020.12.14 08:42:00"
jsonSlurper.time_window.to = "2020.12.14 08:48:00"
def builder = new JsonBuilder(jsonSlurper)
log.info(builder.toString())
def json_str = JsonOutput.toJson(builder)
def json_beauty = JsonOutput.prettyPrint(json_str)
log.info(json_beauty.toString())
File file = new File("C:/pathToFile/test.json")
file.write(json_beauty)
the json file is updated, but all data are wrapped in new object "content"
"content": {
"globals": {
"code":"1111",
"country_code": "8888",
"hits": 70,
"extra_hit": 1,
"keep_money": true
},
"time_window": {
"from": "2020.12.14 08:42:00",
"to": "2020.12.14 08:48:00"
},
"car": {
"have": "nope"
}
}
}
How to avoid that wrapping into "content" object?
Copying and pasting the code from Internet without having any idea what it is doing is not the best way to proceed, at some point you will end up running a Barmin's patch
My expectation is that you're looking for JsonBuilder.toPrettyString() function so basically everything which goes after this line:
def builder = new JsonBuilder(jsonSlurper)
can be replaced with:
new File("C:/pathToFile/test.json").text = builder.toPrettyString()
More information:
Apache Groovy: Parsing and producing JSON
Apache Groovy - Why and How You Should Use It

How to read the text in excel cell and replace with someother value in json output using python?

My python code reads the excel sheet and converts it into a json file output. I have a column in the excel sheet, where the values are either "Planned" or "Unplanned".
1)In the json output, I want the Planned to be replaced with "1" and Unplanned to be replaced with "2" without changing anything in the excel file.
2)In the output I dont want "data" to appear.
3)In the excel, my Start time column value is like this "2018-11-16 08:00:00". I want the output to be "2018-11-16T08:00:00Z". Currently i am getting some garbage value.
Below is my code.
import xlrd, json, time, pytz, requests
from os import sys
from datetime import datetime, timedelta
from collections import OrderedDict
def json_from_excel():
excel_file = 'test.xlsx'
jsonfile = open('ExceltoJSON.json', 'w')
data = []
datestr = str(datetime.now().date())
loaddata = OrderedDict()
workbook = xlrd.open_workbook(excel_file)
worksheet = workbook.sheet_by_name('OMS-GX Data Extraction')
sheet = workbook.sheet_by_index(0)
for j in range(0, 6):
for i in range(1, 40):
temp = {}
temp["requestedStart"] = (sheet.cell_value(i,0)) #Start Time
temp["requestedComplete"] = (sheet.cell_value(i, 1)) #End Time
temp["location"] = (sheet.cell_value(i, 3)) #Station
temp["equipment"] = (sheet.cell_value(i, 4)) #Device Name
temp["switchOrderTypeID"] = (sheet.cell_value(i, 5)) #Outage Type
data.append(temp)
loaddata['data'] = data
json.dump(loaddata, jsonfile, indent=3, sort_keys=False)
jsonfile.write('\n')
return loaddata
if __name__ == '__main__':
data = json_from_excel()
Below is my sample output:
{
"data": [
{
"requestedStart": testtime,
"requestedComplete": testtime,
"location": "testlocation",
"equipment": "testequipment",
"switchOrderTypeID": "Planned"
},
{
"requestedStart": testtime,
"requestedComplete": testtime,
"location": "testlocation",
"equipment": "testequipment",
"switchOrderTypeID": "Unplanned"
}
]
}
Answer to the 1st question:
You may use conditional assignment.
temp["switchOrderTypeID"] = (1 if sheet.cell_value(i, 5) == "Planned" else 0)
Answer to the 2nd question:
Use loaddata = data which will be an array of the jsons without data as json key.
Answer to 3rd question:
from dateutil.parser import parse
t = "2018-11-16 08:00:00"
parse(t).strftime("%Y-%m-%dT%H:%M:%SZ")

Working with JSON and Django

I am new to Python and Django. I am an IT professional that deploys software that monitors computers. The api outputs to JSON. I want to create a Django app that reads the api and outputs the data to an html page. Where do I get started? I think the idea is to write the JSON feed to a Django model. Any help/advice is greatly appreciated.
Here's a simple single file to extract the JSON data:
import urllib2
import json
def printResults(data):
theJSON = json.loads(data)
for i in theJSON[""]
def main():
urlData = ""
webUrl = urllib2.urlopen(urlData)
if (webUrl.getcode() == 200):
data = webUrl.read()
printResults(data)
else:
print "Received error"
if __name__ == '__main__':
main()
If you have an URL returning a json as response, you could try this:
import requests
import json
url = 'http://....' # Your api url
response = requests.get(url)
json_response = response.json()
Now json_response is a list containing dicts. Let's suppose you have this structure:
[
{
'code': ABC,
'avg': 14.5,
'max': 30
},
{
'code': XYZ,
'avg': 11.6,
'max': 21
},
...
]
You can iterate over the list and take every dict into a model.
from yourmodels import CurrentModel
...
for obj in json_response:
cm = CurrentModel()
cm.avg = obj['avg']
cm.max = obj['max']
cm.code = obj['code']
cm.save()
Or you could use a bulk method, but keep in mind that bulk_create does not trigger save method.