Making predictions with Azure Machine learning with new data that contains headers (like pd.Dataframe) - json

My question is somehow related to https://learn.microsoft.com/en-us/answers/questions/217305/data-input-format-call-the-service-for-azure-ml-ti.html - however, the provided solution does not seem to work.
I am constructing a simple model with heart-disease dataset but I wrap it into Pipeline as I use some featurization steps (scaling, encoding etc.) The full script below:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import pickle
# data input
df = pd.read_csv('heart.csv')
# numerical variables
num_cols = ['age',
'trestbps',
'chol',
'thalach',
'oldpeak'
]
# categorical variables
cat_cols = ['sex',
'cp',
'fbs',
'restecg',
'exang',
'slope',
'ca',
'thal']
# changing format of the categorical variables
df[cat_cols] = df[cat_cols].apply(lambda x: x.astype('object'))
# target variable
y = df['target']
# features
X = df.drop(['target'], axis=1)
# data split:
# random seed
np.random.seed(42)
# splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
stratify=y)
# double check
X_train.shape, X_test.shape, y_train.shape, y_test.shape
# pipeline for numerical data
num_preprocessing = Pipeline([('num_imputer', SimpleImputer(strategy='mean')), # imputing with mean
('minmaxscaler', MinMaxScaler())]) # scaling
# pipeline for categorical data
cat_preprocessing = Pipeline([('cat_imputer', SimpleImputer(strategy='constant', fill_value='missing')), # filling missing values
('onehot', OneHotEncoder(drop='first', handle_unknown='error'))]) # One Hot Encoding
# preprocessor - combining pipelines
preprocessor = ColumnTransformer([
('categorical', cat_preprocessing, cat_cols),
('numerical', num_preprocessing, num_cols)
])
# initial model parameters
log_ini_params = {'penalty': 'l2',
'tol': 0.0073559740277086005,
'C': 1.1592424247511928,
'fit_intercept': True,
'solver': 'liblinear'}
# model - Pipeline
log_clf = Pipeline([('preprocessor', preprocessor),
('clf', LogisticRegression(**log_ini_params))])
log_clf.fit(X_train, y_train)
# dumping the model
f = 'model/log.pkl'
with open(f, 'wb') as file:
pickle.dump(log_clf, file)
# loading it
loaded_model = joblib.load(f)
# double check on a single datapoint
new_data = pd.DataFrame({'age': 71,
'sex': 0,
'cp': 0,
'trestbps': 112,
'chol': 203,
'fbs': 0,
'restecg': 1,
'thalach': 185,
'exang': 0,
'oldpeak': 0.1,
'slope': 2,
'ca': 0,
'thal': 2}, index=[0])
loaded_model.predict(new_data)
...and it works just fine. Then I deploy the model to the Azure Web Service using these steps:
I create the score.py file
import joblib
from azureml.core.model import Model
import json
def init():
global model
model_path = Model.get_model_path('log') # logistic
print('Model Path is ', model_path)
model = joblib.load(model_path)
def run(data):
try:
data = json.loads(data)
result = model.predict(data['data'])
# any data type, as long as it is JSON serializable.
return {'data' : result.tolist() , 'message' : 'Successfully classified heart diseases'}
except Exception as e:
error = str(e)
return {'data' : error , 'message' : 'Failed to classify heart diseases'}
I deploy the model:
from azureml.core import Workspace
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.conda_dependencies import CondaDependencies
ws = Workspace.from_config()
model = Model.register(workspace = ws,
model_path ='model/log.pkl',
model_name = 'log',
tags = {'version': '1'},
description = 'Heart disease classification',
)
# to install required packages
env = Environment('env')
cd = CondaDependencies.create(pip_packages=['pandas==1.1.5', 'azureml-defaults','joblib==0.17.0'], conda_packages = ['scikit-learn==0.23.2'])
env.python.conda_dependencies = cd
# Register environment to re-use later
env.register(workspace = ws)
print('Registered Environment')
myenv = Environment.get(workspace=ws, name='env')
myenv.save_to_directory('./environ', overwrite=True)
aciconfig = AciWebservice.deploy_configuration(
cpu_cores=1,
memory_gb=1,
tags={'data':'heart disease classifier'},
description='Classification of heart diseases',
)
inference_config = InferenceConfig(entry_script='score.py', environment=myenv)
service = Model.deploy(workspace=ws,
name='hd-model-log',
models=[model],
inference_config=inference_config,
deployment_config=aciconfig,
overwrite = True)
service.wait_for_deployment(show_output=True)
url = service.scoring_uri
print(url)
The deployment is fine:
Succeeded
ACI service creation operation finished, operation "Succeeded"
But I can not make any predictions with the new data. I try to use:
import pandas as pd
new_data = pd.DataFrame([[71, 0, 0, 112, 203, 0, 1, 185, 0, 0.1, 2, 0, 2],
[80, 0, 0, 115, 203, 0, 1, 185, 0, 0.1, 2, 0, 0]],
columns=['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal'])
Following the answer from this topic (https://learn.microsoft.com/en-us/answers/questions/217305/data-input-format-call-the-service-for-azure-ml-ti.html) I transform the data:
test_sample = json.dumps({'data': new_data.to_dict(orient='records')})
And try to make some predictions:
import json
import requests
data = test_sample
headers = {'Content-Type':'application/json'}
r = requests.post(url, data=data, headers = headers)
print(r.status_code)
print(r.json())
However, I encounter an error:
200
{'data': "Expected 2D array, got 1D array instead:\narray=[{'age': 71, 'sex': 0, 'cp': 0, 'trestbps': 112, 'chol': 203, 'fbs': 0, 'restecg': 1, 'thalach': 185, 'exang': 0, 'oldpeak': 0.1, 'slope': 2, 'ca': 0, 'thal': > 2}\n {'age': 80, 'sex': 0, 'cp': 0, 'trestbps': 115, 'chol': 203, 'fbs': 0, 'restecg': 1, 'thalach': 185, 'exang': 0, 'oldpeak': 0.1, 'slope': 2, 'ca': 0, 'thal': 0}].\nReshape your data either using array.reshape(-1, > 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.", 'message': 'Failed to classify heart diseases'}
How is it possible to adjust the input data to this form of predictions and add other output like predict_proba so I could store them in a separate output dataset?
I know this error is somehow related either with the "run" part of the score.py file or the last code cell that calls the webservice, but I'm unable to find it.
Would really appreciate some help.

I believe I managed to solve the problem - even though I encountered some serious issues. :)
As described here here - I edited the score.py script:
import joblib
from azureml.core.model import Model
import numpy as np
import json
import pandas as pd
import numpy as np
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
data_sample = PandasParameterType(pd.DataFrame({'age': pd.Series([0], dtype='int64'),
'sex': pd.Series(['example_value'], dtype='object'),
'cp': pd.Series(['example_value'], dtype='object'),
'trestbps': pd.Series([0], dtype='int64'),
'chol': pd.Series([0], dtype='int64'),
'fbs': pd.Series(['example_value'], dtype='object'),
'restecg': pd.Series(['example_value'], dtype='object'),
'thalach': pd.Series([0], dtype='int64'),
'exang': pd.Series(['example_value'], dtype='object'),
'oldpeak': pd.Series([0.0], dtype='float64'),
'slope': pd.Series(['example_value'], dtype='object'),
'ca': pd.Series(['example_value'], dtype='object'),
'thal': pd.Series(['example_value'], dtype='object')}))
input_sample = StandardPythonParameterType({'data': data_sample})
result_sample = NumpyParameterType(np.array([0]))
output_sample = StandardPythonParameterType({'Results':result_sample})
def init():
global model
# Example when the model is a file
model_path = Model.get_model_path('log') # logistic
print('Model Path is ', model_path)
model = joblib.load(model_path)
#input_schema('Inputs', input_sample)
#output_schema(output_sample)
def run(Inputs):
try:
data = Inputs['data']
result = model.predict_proba(data)
return result.tolist()
except Exception as e:
error = str(e)
return error
In the deployment step I adjusted the CondaDependencies:
# to install required packages
env = Environment('env')
cd = CondaDependencies.create(pip_packages=['pandas==1.1.5', 'azureml-defaults','joblib==0.17.0', 'inference-schema==1.3.0'], conda_packages = ['scikit-learn==0.22.2.post1'])
env.python.conda_dependencies = cd
# Register environment to re-use later
env.register(workspace = ws)
print('Registered Environment')
as
a) It is necessary to include inference-schema in the Dependencies file
b) I downgraded scikit-learn to scikit-learn==0.22.2.post1 version because of this issue
Now, when I feed the model with new data:
new_data = {
"Inputs": {
"data": [
{
"age": 71,
"sex": "0",
"cp": "0",
"trestbps": 112,
"chol": 203,
"fbs": "0",
"restecg": "1",
"thalach": 185,
"exang": "0",
"oldpeak": 0.1,
"slope": "2",
"ca": "0",
"thal": "2"
}
]
}
}
And use it for prediction:
import json
import requests
data = new_data
headers = {'Content-Type':'application/json'}
r = requests.post(url, str.encode(json.dumps(data)), headers = headers)
print(r.status_code)
print(r.json())
I get:
200 [[0.02325369841858338, 0.9767463015814166]]
Uff! Maybe someone will benefit from my painful learning path! :)

The main issue is with the conversion of categorical variables. The traditional method of handling categorical variable is using OneHotEncoder
# changing format of the categorical variables
df[cat_cols] = df[cat_cols].apply(lambda x: x.astype('object'))
The transforming data need to apply like mentioned below:
from sklearn.preprocessing import MinMaxScaler
cat_col =['sex',
'cp',
'fbs',
'restecg',
'exang',
'slope',
'ca',
'thal']
df_2 = pd.get_dummies(data[cat_col], drop_first = True)
[0,1]'s will be formed after applying dummies, then
new_data = pd.DataFrame([[71, 0, 0, 112, 203, 0, 1, 185, 0, 0.1, 2, 0, 2],
[80, 0, 0, 115, 203, 0, 1, 185, 0, 0.1, 2, 0, 0]],
columns=['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal'])
This can be applied with fewer changes in the syntax.
Edit:
new_data = {
"Inputs": {
"data": [
{
"age": 71,
"sex": "0",
"cp": "0",
"trestbps": 112,
"chol": 203,
"fbs": "0",
"restecg": "1",
"thalach": 185,
"exang": "0",
"oldpeak": 0.1,
"slope": "2",
"ca": "0",
"thal": "2"
}
]
}
}

Related

Python Particle Filter: Time Series in NFOURSID Input Error

Documentation:
https://nfoursid.readthedocs.io/en/latest/
#housekeeping
#_________________________________________________________________________
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from nfoursid.kalman import Kalman
from nfoursid.nfoursid import NFourSID
from nfoursid.state_space import StateSpace
import time
import datetime
import math
import scipy as sp
from pandas_datareader import data as pdr
from IPython.display import display, Latex
from statsmodels.graphics.tsaplots import plot_acf
import yfinance as yfin
#this Time Series should be used as input
#_________________________________________________________________________
import yfinance as yfin
yfin.pdr_override()
spy = pdr.get_data_yahoo('AAPL',start='2022-08-23',end='2022-10-24')
spy['Log Return'] = np.log(spy['Adj Close']/spy['Adj Close'].shift(1))
AAPL=pd.DataFrame((spy['Log Return']))
#this is from the documentation and actually works
#_________________________________________________________________________
pd.set_option('display.max_columns', None)
# reproducable results
np.random.seed(0)
# create a training-set by simulating a state-space model with this many datapoints
NUM_TRAINING_DATAPOINTS = 1000
# same for the test-set
NUM_TEST_DATAPOINTS = 20
INPUT_DIM = 3
OUTPUT_DIM = 2
# actual order of the state-space model in the training- and test-set
INTERNAL_STATE_DIM = 4
NOISE_AMPLITUDE = .1 # add noise to the training- and test-set
FIGSIZE = 8
# define system matrices for the state-space model of the training-
# and test-set
A = np.array([
[1, .01, 0, 0],
[0, 1, .01, 0],
[0, 0, 1, .02],
[0, -.01, 0, 1],
]) / 1.01
B = np.array([
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[0, 1, 1],
]
) / 3
C = np.array([
[1, 0, 1, 1],
[0, 0, 1, -1],
])
D = np.array([
[1, 0, 1],
[0, 1, 0]
]) / 10
state_space = StateSpace(A, B, C, D)
for _ in range(NUM_TRAINING_DATAPOINTS):
input_state = np.random.standard_normal((INPUT_DIM, 1))
noise = np.random.standard_normal((OUTPUT_DIM, 1)) * NOISE_AMPLITUDE
state_space.step(input_state, noise)
nfoursid = NFourSID(
# the state-space model can summarize inputs and outputs as a dataframe
state_space.to_dataframe(),
output_columns=state_space.y_column_names,
input_columns=state_space.u_column_names,
num_block_rows=10
)
nfoursid.subspace_identification()
#further methods
#_________________________________________________________________________
fig, ax = plt.subplots(figsize=figsize)
nfoursid.plot_eigenvalues(ax)
fig.tight_layout()
#interpret order from plot (sprungstelle), still run order->inf
ORDER_OF_MODEL_TO_FIT = 4
state_space_identified, covariance_matrix = nfoursid.system_identification(
rank=ORDER_OF_MODEL_TO_FIT
)
#Ausgabe der Modellvorhersagen
nfoursid.to_dataframe()
#Vorhersage gegen Beobachtung
figsize = (1.3 * FIGSIZE, FIGSIZE)
fig = plt.figure(figsize=figsize)
# the state-space model can plot its inputs and outputs
state_space.plot_input_output(fig)
fig.tight_layout()
Pasting AAPL in method nfoursid:
TypeError: NFourSID.init() missing 1 required positional argument: 'dataframe'
Pasting AAPL in method state_space:
ValueError: Dimensions of u (43, 1) are inconsistent. Expected (3, 1). and TypeError: 'DataFrame' object is not callable

Error when running the model with custom square activation function

An error occur when try to execute a custom activation function all the commands work until reaching the last one hits an error!
Tensorflow version is: 2.9.1
keras version is: 2.9.0
Thanks in advance.
The code
import tensorflow
from tensorflow.keras.datasets import mnist
from tensorflow.keras import backend as K
from keras.utils.generic_utils import get_custom_objects
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D
import numpy as np
import matplotlib.pyplot as plt
# Custom activation function
def custom_activation(x):
return K.cast(K.x**2) # I also tried the Square(x)
# Before creating the model, I update Keras' custom objects:
get_custom_objects().update({'custom_activation': Activation(custom_activation)})
# Model configuration
img_width, img_height = 28, 28
batch_size = 32
no_epochs = 5
no_classes = 10
verbosity = 1
# Load MNIST dataset
(input_train, target_train), (input_test, target_test) = mnist.load_data()
# Reshape data
input_train = input_train.reshape(input_train.shape[0], img_width, img_height, 1)
input_test = input_test.reshape(input_test.shape[0], img_width, img_height, 1)
input_shape = (img_width, img_height, 1)
# Parse numbers as floats
input_train = input_train.astype('float32')
input_test = input_test.astype('float32')
# Normalize data: [0, 1].
input_train = input_train / 255
input_test = input_test / 255
# Convert target vectors to categorical targets
target_train = tensorflow.keras.utils.to_categorical(target_train, no_classes)
target_test = tensorflow.keras.utils.to_categorical(target_test, no_classes)
# Create the model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation=custom_activation, input_shape=((input_shape))))
The Error
When trying to execute the following line:
model.add(Conv2D(32, kernel_size=(3, 3), activation=custom_activation, input_shape=((input_shape))))
This error appears:
AttributeError: Exception encountered when calling layer "conv2d_4" (type Conv2D).
module 'keras.api._v2.keras.backend' has no attribute 'x'
Call arguments received by layer "conv2d_4" (type Conv2D):
• inputs=tf.Tensor(shape=(None, 28, 28, 1), dtype=float32)

Trying to parse access.log

Good afternoon, I'm trying to find the top 10 ip in access.log (standard log of the Apache server).
There is a code like this:
import argparse
import json
import re
from collections import defaultdict, Counter
parser = argparse.ArgumentParser(description='parser script')
parser.add_argument('-f', dest='logfile', action='store', default='access.log')
args = parser.parse_args()
regul_ip = (r"^(?P<ips>.*?)")
regul_method = (r"\"(?P<request_method>GET|POST|PUT|DELETE|HEAD)")
def req_by_method():
dict_ip = defaultdict(lambda: {"GET": 0, "POST": 0, "PUT": 0, "DELETE": 0, "HEAD": 0})
with open(args.logfile) as file:
for index, line in enumerate(file.readlines()):
try:
ip = re.search(regul_ip, line).group()
method = re.search(regul_method, line).groups()[0]
return Counter(dict_ip).most_common(10)
except AttributeError:
pass
dict_ip[ip][method] += 1
print(json.dumps(dict_ip, indent=4))
with open("final_log.json", "w") as jsonfile:
json.dump(dict_ip, jsonfile, indent=5)
When the code is executed, I only get: []
How can I fix this code to make it work?
I also need to output to the final json file a set of such lines: "ip", "method", "status code", "url" and the duration of the request

FastAPI gunicon uvicorn access_log format customization

We are using the https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker FastAPI and were able to customize our logging with a gunicorn logging file.
However, we are not able to change the details of the %(message)s attribute as defined in the documentation access log - https://docs.gunicorn.org/en/stable/settings.html#accesslog.
We receive an error postet below, that the keys are unknown.
A similar question has been asked before and received many upvotes.
gunicorn log-config access_log_format
What are we doing wrong?
#start.sh
# Start Gunicorn
exec gunicorn -k uvicorn.workers.UvicornWorker -c "$GUNICORN_CONF" "$APP_MODULE" --log-config "/logging.conf"
[loggers]
keys=root, gunicorn.error, gunicorn.access,uvicorn.error,uvicorn.access
[handlers]
keys=console, error_file, access_file, access_filegunicorn
[formatters]
keys=generic, access, accessgunicorn
[logger_root]
level=INFO
handlers=console
propagate=1
[logger_gunicorn.error]
level=INFO
handlers=error_file
propagate=0
qualname=gunicorn.error
[logger_gunicorn.access]
level=INFO
handlers=access_filegunicorn
propagate=0
qualname=gunicorn.access
[logger_uvicorn.error]
level=INFO
handlers=error_file
propagate=0
qualname=uvicorn.error
[logger_uvicorn.access]
level=INFO
handlers=access_file
propagate=0
qualname=uvicorn.access
[handler_console]
class=StreamHandler
formatter=generic
args=(sys.stdout, )
[handler_error_file]
class=StreamHandler
formatter=generic
args=(sys.stdout, )
[handler_access_file]
class=StreamHandler
formatter=access
args=(sys.stdout, )
[handler_access_filegunicorn]
class=StreamHandler
formatter=accessgunicorn
args=(sys.stdout, )
[formatter_generic]
format=[%(levelname)s]: %(message)s
datefmt=%Y-%m-%dT%H:%M:%S
class=logging.Formatter
[formatter_access]
format=[%(levelname)s]: %(message)s
datefmt=%Y-%m-%dT%H:%M:%S
class=logging.Formatter
[formatter_accessgunicorn]
format=[%(levelname)s]: '{"remote_ip":"%(h)s","session_id":"%({X-Session-Id}i)s","status":"%(s)s","request_method":"%(m)s","request_path":"%(U)s","request_querystring":"%(q)s","request_timetaken":"%(D)s","response_length":"%(B)s", "remote_addr": "%(h)s"}'
datefmt=%Y-%m-%dT%H:%M:%S
class=logging.Formatter
Message: '%s - "%s %s HTTP/%s" %d'
Arguments: ('213.3.14.24:53374', 'GET', '/v1/docs', '1.1', 200)
--- Logging error ---
Traceback (most recent call last):
File "/usr/local/lib/python3.7/logging/__init__.py", line 1025, in emit
msg = self.format(record)
File "/usr/local/lib/python3.7/logging/__init__.py", line 869, in format
return fmt.format(record)
File "/usr/local/lib/python3.7/logging/__init__.py", line 611, in format
s = self.formatMessage(record)
File "/usr/local/lib/python3.7/logging/__init__.py", line 580, in formatMessage
return self._style.format(record)
File "/usr/local/lib/python3.7/logging/__init__.py", line 422, in format
return self._fmt % record.__dict__
KeyError: 'h'
Call stack:
File "/usr/local/bin/gunicorn", line 8, in <module>
sys.exit(run())
File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 58, in run
WSGIApplication("%(prog)s [OPTIONS] [APP_MODULE]").run()
File "/usr/local/lib/python3.7/site-packages/gunicorn/app/base.py", line 228, in run
super().run()
File "/usr/local/lib/python3.7/site-packages/gunicorn/app/base.py", line 72, in run
Arbiter(self).run()
File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 202, in run
self.manage_workers()
File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 545, in manage_workers
self.spawn_workers()
File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 616, in spawn_workers
self.spawn_worker()
File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 583, in spawn_worker
worker.init_process()
File "/usr/local/lib/python3.7/site-packages/uvicorn/workers.py", line 61, in init_process
super(UvicornWorker, self).init_process()
File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 140, in init_process
self.run()
File "/usr/local/lib/python3.7/site-packages/uvicorn/workers.py", line 70, in run
loop.run_until_complete(server.serve(sockets=self.sockets))
File "/usr/local/lib/python3.7/site-packages/uvicorn/protocols/http/httptools_impl.py", line 385, in run_asgi
result = await app(self.scope, self.receive, self.send)
File "/usr/local/lib/python3.7/site-packages/uvicorn/middleware/proxy_headers.py", line 45, in __call__
return await self.app(scope, receive, send)
File "/usr/local/lib/python3.7/site-packages/fastapi/applications.py", line 171, in __call__
await super().__call__(scope, receive, send)
File "/usr/local/lib/python3.7/site-packages/starlette/applications.py", line 102, in __call__
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.7/site-packages/starlette/middleware/errors.py", line 159, in __call__
await self.app(scope, receive, _send)
File "/usr/local/lib/python3.7/site-packages/starlette/middleware/cors.py", line 78, in __call__
await self.app(scope, receive, send)
File "/usr/local/lib/python3.7/site-packages/starlette/exceptions.py", line 71, in __call__
await self.app(scope, receive, sender)
File "/usr/local/lib/python3.7/site-packages/starlette/routing.py", line 550, in __call__
await route.handle(scope, receive, send)
I found very useful information here https://github.com/tiangolo/fastapi/issues/1508
I needed to add the request datetime , and the solution that I implemented was:
#app.on_event("startup")
async def startup_event():
logger = logging.getLogger("uvicorn.access")
console_formatter = uvicorn.logging.ColourizedFormatter(
"{asctime} {levelprefix} : {message}",
style="{", use_colors=True)
logger.handlers[0].setFormatter(console_formatter)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
Our solution was a customer logger written in python that is referenced in a logging.conf file
logging.conf
[loggers]
keys=root, gunicorn.error, gunicorn.access,uvicorn.error,uvicorn.access
[handlers]
keys=console, error_file, access_file, accesscustom
[formatters]
keys=generic, access, AccessFormatter
[logger_root]
level=INFO
handlers=console
propagate=1
[logger_gunicorn.error]
level=INFO
handlers=error_file
propagate=0
qualname=gunicorn.error
[logger_gunicorn.access]
level=INFO
handlers=accesscustom
propagate=0
qualname=gunicorn.access
[logger_uvicorn.error]
level=INFO
handlers=error_file
propagate=0
qualname=uvicorn.error
[logger_uvicorn.access]
level=INFO
handlers=accesscustom
propagate=0
qualname=uvicorn.access
[handler_console]
class=StreamHandler
formatter=generic
args=(sys.stdout, )
[handler_error_file]
class=StreamHandler
formatter=generic
args=(sys.stdout, )
[handler_access_file]
class=StreamHandler
formatter=access
args=(sys.stdout, )
[handler_accesscustom]
class=StreamHandler
formatter=AccessFormatter
args=(sys.stdout, )
[formatter_generic]
format=%(levelname)s: %(message)s
datefmt=%Y-%m-%dT%H:%M:%S
class=uvicorn.logging.DefaultFormatter
[formatter_access]
format=%(levelname)s: %(message)s
datefmt=%Y-%m-%dT%H:%M:%S
class=customlogger.CustomFormatter
[formatter_AccessFormatter]
format={"event":"access_log","ip":"%(h)s","status":"%(s)s","method":"%(m)s","path":"%(U)s","referer":"%(f)s","x_session_id":"%(x-session-id)s","x_google_id":"%(x-google-id)s","x_server_time":"%(x-server-time)s","agent":"%(a)s"}
datefmt=%Y-%m-%dT%H:%M:%S
class=customlogger.CustomFormatter
customlogger.py
import base64
import binascii
import http
import logging
import os
import sys
import time
from copy import copy
from datetime import datetime
from pprint import pprint
import click
TRACE_LOG_LEVEL = 5
class ColourizedFormatter(logging.Formatter):
"""
A custom log formatter class that:
* Outputs the LOG_LEVEL with an appropriate color.
* If a log call includes an `extras={"color_message": ...}` it will be used
for formatting the output, instead of the plain text message.
"""
level_name_colors = {
TRACE_LOG_LEVEL: lambda level_name: click.style(str(level_name), fg="blue"),
logging.DEBUG: lambda level_name: click.style(str(level_name), fg="cyan"),
logging.INFO: lambda level_name: click.style(str(level_name), fg="green"),
logging.WARNING: lambda level_name: click.style(str(level_name), fg="yellow"),
logging.ERROR: lambda level_name: click.style(str(level_name), fg="red"),
logging.CRITICAL: lambda level_name: click.style(
str(level_name), fg="bright_red"
),
}
def __init__(self, fmt=None, datefmt=None, style="%", use_colors=None):
if use_colors in (True, False):
self.use_colors = use_colors
else:
self.use_colors = sys.stdout.isatty()
super().__init__(fmt=fmt, datefmt=datefmt, style=style)
def color_level_name(self, level_name, level_no):
default = lambda level_name: str(level_name)
func = self.level_name_colors.get(level_no, default)
return func(level_name)
def should_use_colors(self):
return True
def formatMessage(self, record):
recordcopy = copy(record)
levelname = recordcopy.levelname
seperator = " " * (8 - len(recordcopy.levelname))
if self.use_colors:
levelname = self.color_level_name(levelname, recordcopy.levelno)
if "color_message" in recordcopy.__dict__:
recordcopy.msg = recordcopy.__dict__["color_message"]
recordcopy.__dict__["message"] = recordcopy.getMessage()
recordcopy.__dict__["levelprefix"] = levelname + ":" + seperator
return super().formatMessage(recordcopy)
class DefaultFormatter(ColourizedFormatter):
def should_use_colors(self):
return sys.stderr.isatty()
class AccessFormatter(ColourizedFormatter):
status_code_colours = {
1: lambda code: click.style(str(code), fg="bright_white"),
2: lambda code: click.style(str(code), fg="green"),
3: lambda code: click.style(str(code), fg="yellow"),
4: lambda code: click.style(str(code), fg="red"),
5: lambda code: click.style(str(code), fg="bright_red"),
}
def get_client_addr(self, scope):
client = scope.get("client")
if not client:
return ""
return "%s:%d" % (client[0], client[1])
def get_path(self, scope):
return scope.get("root_path", "") + scope["path"]
def get_full_path(self, scope):
path = scope.get("root_path", "") + scope["path"]
query_string = scope.get("query_string", b"").decode("ascii")
if query_string:
return path + "?" + query_string
return path
def get_status_code(self, record):
status_code = record.__dict__["status_code"]
try:
status_phrase = http.HTTPStatus(status_code).phrase
except ValueError:
status_phrase = ""
status_and_phrase = "%s %s" % (status_code, status_phrase)
if self.use_colors:
default = lambda code: status_and_phrase
func = self.status_code_colours.get(status_code // 100, default)
return func(status_and_phrase)
return status_and_phrase
def formatMessage(self, record):
recordcopy = copy(record)
scope = recordcopy.__dict__["scope"]
method = scope["method"]
path = self.get_path(scope)
full_path = self.get_full_path(scope)
client_addr = self.get_client_addr(scope)
status_code = self.get_status_code(recordcopy)
http_version = scope["http_version"]
request_line = "%s %s HTTP/%s" % (method, full_path, http_version)
if self.use_colors:
request_line = click.style(request_line, bold=True)
recordcopy.__dict__.update(
{
"method": method,
"path": path,
"full_path": full_path,
"client_addr": client_addr,
"request_line": request_line,
"status_code": status_code,
"http_version": http_version,
}
)
return super().formatMessage(recordcopy)
class SafeAtoms(dict):
def __init__(self, atoms):
dict.__init__(self)
for key, value in atoms.items():
if isinstance(value, str):
self[key] = value.replace('"', '\\"')
else:
self[key] = value
def __getitem__(self, k):
if k.startswith("{"):
kl = k.lower()
if kl in self:
return super().__getitem__(kl)
else:
return "-"
if k in self:
return super().__getitem__(k)
else:
return '-'
class CustomFormatter(AccessFormatter):
atoms_wrapper_class = SafeAtoms
def now(self):
""" return date in Apache Common Log Format """
return time.strftime('[%d/%b/%Y:%H:%M:%S %z]')
def _get_user(self, environ):
user = None
http_auth = environ.get("HTTP_AUTHORIZATION")
if http_auth and http_auth.lower().startswith('basic'):
auth = http_auth.split(" ", 1)
if len(auth) == 2:
try:
# b64decode doesn't accept unicode in Python < 3.3
# so we need to convert it to a byte string
auth = base64.b64decode(auth[1].strip().encode('utf-8'))
# b64decode returns a byte string
auth = auth.decode('utf-8')
auth = auth.split(":", 1)
except (TypeError, binascii.Error, UnicodeDecodeError) as exc:
self.debug("Couldn't get username: %s", exc)
return user
if len(auth) == 2:
user = auth[0]
return user
def atoms(self, environ, request_time, scope, statuscode, created):
headers = dict(scope.get('headers',[('-','-')]))
response_headers = dict(scope.get('response_headers',[('-','-')]))
atoms = {
'h': scope.get("client", ('-', ''))[0],
'l': '-',
's': statuscode,
'u': self._get_user(environ) or '-',
't': created,
'm': str(scope.get("method", "-")),
'U': scope.get("path", "-"),
'q': scope.get("query_string", "-").decode("utf-8"),
'H': str(scope.get("type", "-")),
'f': headers.get(b"referer", b"-").decode("utf-8"),
'a': headers.get(b"user-agent", b"-").decode("utf-8"),
'x-session-id': headers.get(b"x-session-id", b"-").decode("utf-8"),
'x-google-id': headers.get(b"x-google-id", b"-").decode("utf-8"),
'x-server-time': response_headers.get(b"x-server-time", b"").decode("utf-8"),
'p': "<%s>" % os.getpid()
}
return atoms
def formatMessage(self, record):
recordcopy = copy(record)
scope = recordcopy.__dict__["scope"]
#pprint(vars(recordcopy))
safe_atoms = self.atoms_wrapper_class(
self.atoms(os.environ, datetime.now(), scope, recordcopy.status_code, recordcopy.created)
)
recordcopy.__dict__.update(safe_atoms)
# pprint(vars(os.environ))
return super().formatMessage(recordcopy)
I'm also using the FastAPI - Uvicorn - Gunicorn stack.
To modify the format of the uvicorn logging, I first inspected its current configuration :
>>> from pprint import pprint
>>> import uvicorn.config
>>> pprint(uvicorn.config.LOGGING_CONFIG)
{'disable_existing_loggers': False,
'formatters': {'access': {'()': 'uvicorn.logging.AccessFormatter',
'fmt': '%(levelprefix)s %(client_addr)s - '
'"%(request_line)s" %(status_code)s'},
'default': {'()': 'uvicorn.logging.DefaultFormatter',
'fmt': '%(levelprefix)s %(message)s',
'use_colors': None}},
'handlers': {'access': {'class': 'logging.StreamHandler',
'formatter': 'access',
'stream': 'ext://sys.stdout'},
'default': {'class': 'logging.StreamHandler',
'formatter': 'default',
'stream': 'ext://sys.stderr'}},
'loggers': {'uvicorn': {'handlers': ['default'], 'level': 'INFO'},
'uvicorn.access': {'handlers': ['access'],
'level': 'INFO',
'propagate': False},
'uvicorn.error': {'level': 'INFO'}},
'version': 1}
And I created my own logging configuration based on the default configuration of uvicorn. I added the date/time of the log, and my own custom logger :
import logging
LOGGER_NAME = "myapp"
log_config = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
'access': {
'()': 'uvicorn.logging.AccessFormatter',
'fmt': '%(levelprefix)s %(asctime)s - %(client_addr)s - "%(request_line)s" %(status_code)s',
"datefmt": "%Y-%m-%d %H:%M:%S",
"use_colors": True
},
"default": {
"()": "uvicorn.logging.DefaultFormatter",
"fmt": "%(levelprefix)s %(asctime)s - %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S",
"use_colors": True
},
},
"handlers": {
'access': {
'class': 'logging.StreamHandler',
'formatter': 'access',
'stream': 'ext://sys.stdout'
},
"default": {
"formatter": "default",
"class": "logging.StreamHandler",
"stream": "ext://sys.stderr",
},
},
"loggers": {
LOGGER_NAME: {
"handlers": ["default"],
"level": "DEBUG",
"propagate": False
},
"uvicorn": {
"handlers": ["default"],
"level": "DEBUG",
"propagate": True
},
'uvicorn.access': {
'handlers': ['access'],
'level': 'INFO',
'propagate': False
},
'uvicorn.error': {
'level': 'INFO',
'propagate': False
}
},
}
def get_logger():
return logging.getLogger(LOGGER_NAME)
Then in my main.py file, where I defined app = FastAPI(...), I configure the logging just after my imports lines with :
logging.config.dictConfig(log_config)
And I do custom logging in my app by using the custom logger that I defined :
logger = get_logger()
logger.info("Hello World!")

Hello Getting parsing json file

I need some help here parsing a json data :
My json File contain this
{
"message": "{\"gender\":\"MADAME\",\"Polo\":\"POTA\",\"last_name\":\"pato\",\"email\":\"pato.pota#mailler.com\",\"subject\":\"toto claim\",\"sub_subject\":\"Claim insurance car\",\"question\":\"claim for red car\",\"store\":\"claiming for incident\"}",
"context": [
],
"level": 200,
"level_name": "INFO",
"channel": "mailer",
"datetime": {
"date": "2016-09-19 11:00:26.795353",
"timezone_type": 3,
},
"extra": [
]
}
Python Code.
import os
import json
def Get_running_dir():
path = os.getcwd()
file = path + "\json_data.txt"
print(file)
with open(file, 'r') as f:
data = f.read()
data_json = json.loads(data)
print(data_json)
print(type(data_json))
Get_running_dir()
The issue is { print(type(data_json))} this is a dict right.
Once I call this print(data_json['message']['gender'])
<class 'dict'>
Traceback (most recent call last):
File "Extract_log.py", line 29, in <module>
Get_running_dir()
File "Extract_log.py", line 25, in Get_running_dir
print(data_json['message']['gender'])
TypeError: string indices must be integers
I need some help to parse this file please help me.
Thanking you in advance.
Regards,
I figured how to work with the json, this out today.
import os
import json
def Get_running_dir():
path = os.getcwd()
file = path + "\json_data.txt"
print(file)
with open(file, 'r') as f:
data = f.read()
data_json = json.loads(data)
# My error was here:
print(data_json['message']) # This convert to String.
msg = json.loads(data_json['message']) # THIS CONVERT THE STRING TO #Dict.
# this way i can access its keys.
# Like this.
print(msg['gender'] ,msg['first_name'], msg['last_name'])