Making predictions with Azure Machine learning with new data that contains headers (like pd.Dataframe) - json
My question is somehow related to https://learn.microsoft.com/en-us/answers/questions/217305/data-input-format-call-the-service-for-azure-ml-ti.html - however, the provided solution does not seem to work.
I am constructing a simple model with heart-disease dataset but I wrap it into Pipeline as I use some featurization steps (scaling, encoding etc.) The full script below:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import pickle
# data input
df = pd.read_csv('heart.csv')
# numerical variables
num_cols = ['age',
'trestbps',
'chol',
'thalach',
'oldpeak'
]
# categorical variables
cat_cols = ['sex',
'cp',
'fbs',
'restecg',
'exang',
'slope',
'ca',
'thal']
# changing format of the categorical variables
df[cat_cols] = df[cat_cols].apply(lambda x: x.astype('object'))
# target variable
y = df['target']
# features
X = df.drop(['target'], axis=1)
# data split:
# random seed
np.random.seed(42)
# splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
stratify=y)
# double check
X_train.shape, X_test.shape, y_train.shape, y_test.shape
# pipeline for numerical data
num_preprocessing = Pipeline([('num_imputer', SimpleImputer(strategy='mean')), # imputing with mean
('minmaxscaler', MinMaxScaler())]) # scaling
# pipeline for categorical data
cat_preprocessing = Pipeline([('cat_imputer', SimpleImputer(strategy='constant', fill_value='missing')), # filling missing values
('onehot', OneHotEncoder(drop='first', handle_unknown='error'))]) # One Hot Encoding
# preprocessor - combining pipelines
preprocessor = ColumnTransformer([
('categorical', cat_preprocessing, cat_cols),
('numerical', num_preprocessing, num_cols)
])
# initial model parameters
log_ini_params = {'penalty': 'l2',
'tol': 0.0073559740277086005,
'C': 1.1592424247511928,
'fit_intercept': True,
'solver': 'liblinear'}
# model - Pipeline
log_clf = Pipeline([('preprocessor', preprocessor),
('clf', LogisticRegression(**log_ini_params))])
log_clf.fit(X_train, y_train)
# dumping the model
f = 'model/log.pkl'
with open(f, 'wb') as file:
pickle.dump(log_clf, file)
# loading it
loaded_model = joblib.load(f)
# double check on a single datapoint
new_data = pd.DataFrame({'age': 71,
'sex': 0,
'cp': 0,
'trestbps': 112,
'chol': 203,
'fbs': 0,
'restecg': 1,
'thalach': 185,
'exang': 0,
'oldpeak': 0.1,
'slope': 2,
'ca': 0,
'thal': 2}, index=[0])
loaded_model.predict(new_data)
...and it works just fine. Then I deploy the model to the Azure Web Service using these steps:
I create the score.py file
import joblib
from azureml.core.model import Model
import json
def init():
global model
model_path = Model.get_model_path('log') # logistic
print('Model Path is ', model_path)
model = joblib.load(model_path)
def run(data):
try:
data = json.loads(data)
result = model.predict(data['data'])
# any data type, as long as it is JSON serializable.
return {'data' : result.tolist() , 'message' : 'Successfully classified heart diseases'}
except Exception as e:
error = str(e)
return {'data' : error , 'message' : 'Failed to classify heart diseases'}
I deploy the model:
from azureml.core import Workspace
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.conda_dependencies import CondaDependencies
ws = Workspace.from_config()
model = Model.register(workspace = ws,
model_path ='model/log.pkl',
model_name = 'log',
tags = {'version': '1'},
description = 'Heart disease classification',
)
# to install required packages
env = Environment('env')
cd = CondaDependencies.create(pip_packages=['pandas==1.1.5', 'azureml-defaults','joblib==0.17.0'], conda_packages = ['scikit-learn==0.23.2'])
env.python.conda_dependencies = cd
# Register environment to re-use later
env.register(workspace = ws)
print('Registered Environment')
myenv = Environment.get(workspace=ws, name='env')
myenv.save_to_directory('./environ', overwrite=True)
aciconfig = AciWebservice.deploy_configuration(
cpu_cores=1,
memory_gb=1,
tags={'data':'heart disease classifier'},
description='Classification of heart diseases',
)
inference_config = InferenceConfig(entry_script='score.py', environment=myenv)
service = Model.deploy(workspace=ws,
name='hd-model-log',
models=[model],
inference_config=inference_config,
deployment_config=aciconfig,
overwrite = True)
service.wait_for_deployment(show_output=True)
url = service.scoring_uri
print(url)
The deployment is fine:
Succeeded
ACI service creation operation finished, operation "Succeeded"
But I can not make any predictions with the new data. I try to use:
import pandas as pd
new_data = pd.DataFrame([[71, 0, 0, 112, 203, 0, 1, 185, 0, 0.1, 2, 0, 2],
[80, 0, 0, 115, 203, 0, 1, 185, 0, 0.1, 2, 0, 0]],
columns=['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal'])
Following the answer from this topic (https://learn.microsoft.com/en-us/answers/questions/217305/data-input-format-call-the-service-for-azure-ml-ti.html) I transform the data:
test_sample = json.dumps({'data': new_data.to_dict(orient='records')})
And try to make some predictions:
import json
import requests
data = test_sample
headers = {'Content-Type':'application/json'}
r = requests.post(url, data=data, headers = headers)
print(r.status_code)
print(r.json())
However, I encounter an error:
200
{'data': "Expected 2D array, got 1D array instead:\narray=[{'age': 71, 'sex': 0, 'cp': 0, 'trestbps': 112, 'chol': 203, 'fbs': 0, 'restecg': 1, 'thalach': 185, 'exang': 0, 'oldpeak': 0.1, 'slope': 2, 'ca': 0, 'thal': > 2}\n {'age': 80, 'sex': 0, 'cp': 0, 'trestbps': 115, 'chol': 203, 'fbs': 0, 'restecg': 1, 'thalach': 185, 'exang': 0, 'oldpeak': 0.1, 'slope': 2, 'ca': 0, 'thal': 0}].\nReshape your data either using array.reshape(-1, > 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.", 'message': 'Failed to classify heart diseases'}
How is it possible to adjust the input data to this form of predictions and add other output like predict_proba so I could store them in a separate output dataset?
I know this error is somehow related either with the "run" part of the score.py file or the last code cell that calls the webservice, but I'm unable to find it.
Would really appreciate some help.
I believe I managed to solve the problem - even though I encountered some serious issues. :)
As described here here - I edited the score.py script:
import joblib
from azureml.core.model import Model
import numpy as np
import json
import pandas as pd
import numpy as np
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
data_sample = PandasParameterType(pd.DataFrame({'age': pd.Series([0], dtype='int64'),
'sex': pd.Series(['example_value'], dtype='object'),
'cp': pd.Series(['example_value'], dtype='object'),
'trestbps': pd.Series([0], dtype='int64'),
'chol': pd.Series([0], dtype='int64'),
'fbs': pd.Series(['example_value'], dtype='object'),
'restecg': pd.Series(['example_value'], dtype='object'),
'thalach': pd.Series([0], dtype='int64'),
'exang': pd.Series(['example_value'], dtype='object'),
'oldpeak': pd.Series([0.0], dtype='float64'),
'slope': pd.Series(['example_value'], dtype='object'),
'ca': pd.Series(['example_value'], dtype='object'),
'thal': pd.Series(['example_value'], dtype='object')}))
input_sample = StandardPythonParameterType({'data': data_sample})
result_sample = NumpyParameterType(np.array([0]))
output_sample = StandardPythonParameterType({'Results':result_sample})
def init():
global model
# Example when the model is a file
model_path = Model.get_model_path('log') # logistic
print('Model Path is ', model_path)
model = joblib.load(model_path)
#input_schema('Inputs', input_sample)
#output_schema(output_sample)
def run(Inputs):
try:
data = Inputs['data']
result = model.predict_proba(data)
return result.tolist()
except Exception as e:
error = str(e)
return error
In the deployment step I adjusted the CondaDependencies:
# to install required packages
env = Environment('env')
cd = CondaDependencies.create(pip_packages=['pandas==1.1.5', 'azureml-defaults','joblib==0.17.0', 'inference-schema==1.3.0'], conda_packages = ['scikit-learn==0.22.2.post1'])
env.python.conda_dependencies = cd
# Register environment to re-use later
env.register(workspace = ws)
print('Registered Environment')
as
a) It is necessary to include inference-schema in the Dependencies file
b) I downgraded scikit-learn to scikit-learn==0.22.2.post1 version because of this issue
Now, when I feed the model with new data:
new_data = {
"Inputs": {
"data": [
{
"age": 71,
"sex": "0",
"cp": "0",
"trestbps": 112,
"chol": 203,
"fbs": "0",
"restecg": "1",
"thalach": 185,
"exang": "0",
"oldpeak": 0.1,
"slope": "2",
"ca": "0",
"thal": "2"
}
]
}
}
And use it for prediction:
import json
import requests
data = new_data
headers = {'Content-Type':'application/json'}
r = requests.post(url, str.encode(json.dumps(data)), headers = headers)
print(r.status_code)
print(r.json())
I get:
200 [[0.02325369841858338, 0.9767463015814166]]
Uff! Maybe someone will benefit from my painful learning path! :)
The main issue is with the conversion of categorical variables. The traditional method of handling categorical variable is using OneHotEncoder
# changing format of the categorical variables
df[cat_cols] = df[cat_cols].apply(lambda x: x.astype('object'))
The transforming data need to apply like mentioned below:
from sklearn.preprocessing import MinMaxScaler
cat_col =['sex',
'cp',
'fbs',
'restecg',
'exang',
'slope',
'ca',
'thal']
df_2 = pd.get_dummies(data[cat_col], drop_first = True)
[0,1]'s will be formed after applying dummies, then
new_data = pd.DataFrame([[71, 0, 0, 112, 203, 0, 1, 185, 0, 0.1, 2, 0, 2],
[80, 0, 0, 115, 203, 0, 1, 185, 0, 0.1, 2, 0, 0]],
columns=['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal'])
This can be applied with fewer changes in the syntax.
Edit:
new_data = {
"Inputs": {
"data": [
{
"age": 71,
"sex": "0",
"cp": "0",
"trestbps": 112,
"chol": 203,
"fbs": "0",
"restecg": "1",
"thalach": 185,
"exang": "0",
"oldpeak": 0.1,
"slope": "2",
"ca": "0",
"thal": "2"
}
]
}
}
Related
Python Particle Filter: Time Series in NFOURSID Input Error
Documentation: https://nfoursid.readthedocs.io/en/latest/ #housekeeping #_________________________________________________________________________ import numpy as np import matplotlib.pyplot as plt import pandas as pd from nfoursid.kalman import Kalman from nfoursid.nfoursid import NFourSID from nfoursid.state_space import StateSpace import time import datetime import math import scipy as sp from pandas_datareader import data as pdr from IPython.display import display, Latex from statsmodels.graphics.tsaplots import plot_acf import yfinance as yfin #this Time Series should be used as input #_________________________________________________________________________ import yfinance as yfin yfin.pdr_override() spy = pdr.get_data_yahoo('AAPL',start='2022-08-23',end='2022-10-24') spy['Log Return'] = np.log(spy['Adj Close']/spy['Adj Close'].shift(1)) AAPL=pd.DataFrame((spy['Log Return'])) #this is from the documentation and actually works #_________________________________________________________________________ pd.set_option('display.max_columns', None) # reproducable results np.random.seed(0) # create a training-set by simulating a state-space model with this many datapoints NUM_TRAINING_DATAPOINTS = 1000 # same for the test-set NUM_TEST_DATAPOINTS = 20 INPUT_DIM = 3 OUTPUT_DIM = 2 # actual order of the state-space model in the training- and test-set INTERNAL_STATE_DIM = 4 NOISE_AMPLITUDE = .1 # add noise to the training- and test-set FIGSIZE = 8 # define system matrices for the state-space model of the training- # and test-set A = np.array([ [1, .01, 0, 0], [0, 1, .01, 0], [0, 0, 1, .02], [0, -.01, 0, 1], ]) / 1.01 B = np.array([ [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 1], ] ) / 3 C = np.array([ [1, 0, 1, 1], [0, 0, 1, -1], ]) D = np.array([ [1, 0, 1], [0, 1, 0] ]) / 10 state_space = StateSpace(A, B, C, D) for _ in range(NUM_TRAINING_DATAPOINTS): input_state = np.random.standard_normal((INPUT_DIM, 1)) noise = np.random.standard_normal((OUTPUT_DIM, 1)) * NOISE_AMPLITUDE state_space.step(input_state, noise) nfoursid = NFourSID( # the state-space model can summarize inputs and outputs as a dataframe state_space.to_dataframe(), output_columns=state_space.y_column_names, input_columns=state_space.u_column_names, num_block_rows=10 ) nfoursid.subspace_identification() #further methods #_________________________________________________________________________ fig, ax = plt.subplots(figsize=figsize) nfoursid.plot_eigenvalues(ax) fig.tight_layout() #interpret order from plot (sprungstelle), still run order->inf ORDER_OF_MODEL_TO_FIT = 4 state_space_identified, covariance_matrix = nfoursid.system_identification( rank=ORDER_OF_MODEL_TO_FIT ) #Ausgabe der Modellvorhersagen nfoursid.to_dataframe() #Vorhersage gegen Beobachtung figsize = (1.3 * FIGSIZE, FIGSIZE) fig = plt.figure(figsize=figsize) # the state-space model can plot its inputs and outputs state_space.plot_input_output(fig) fig.tight_layout() Pasting AAPL in method nfoursid: TypeError: NFourSID.init() missing 1 required positional argument: 'dataframe' Pasting AAPL in method state_space: ValueError: Dimensions of u (43, 1) are inconsistent. Expected (3, 1). and TypeError: 'DataFrame' object is not callable
Error when running the model with custom square activation function
An error occur when try to execute a custom activation function all the commands work until reaching the last one hits an error! Tensorflow version is: 2.9.1 keras version is: 2.9.0 Thanks in advance. The code import tensorflow from tensorflow.keras.datasets import mnist from tensorflow.keras import backend as K from keras.utils.generic_utils import get_custom_objects from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation from tensorflow.keras.layers import Conv2D, MaxPooling2D import numpy as np import matplotlib.pyplot as plt # Custom activation function def custom_activation(x): return K.cast(K.x**2) # I also tried the Square(x) # Before creating the model, I update Keras' custom objects: get_custom_objects().update({'custom_activation': Activation(custom_activation)}) # Model configuration img_width, img_height = 28, 28 batch_size = 32 no_epochs = 5 no_classes = 10 verbosity = 1 # Load MNIST dataset (input_train, target_train), (input_test, target_test) = mnist.load_data() # Reshape data input_train = input_train.reshape(input_train.shape[0], img_width, img_height, 1) input_test = input_test.reshape(input_test.shape[0], img_width, img_height, 1) input_shape = (img_width, img_height, 1) # Parse numbers as floats input_train = input_train.astype('float32') input_test = input_test.astype('float32') # Normalize data: [0, 1]. input_train = input_train / 255 input_test = input_test / 255 # Convert target vectors to categorical targets target_train = tensorflow.keras.utils.to_categorical(target_train, no_classes) target_test = tensorflow.keras.utils.to_categorical(target_test, no_classes) # Create the model model = Sequential() model.add(Conv2D(32, kernel_size=(3, 3), activation=custom_activation, input_shape=((input_shape)))) The Error When trying to execute the following line: model.add(Conv2D(32, kernel_size=(3, 3), activation=custom_activation, input_shape=((input_shape)))) This error appears: AttributeError: Exception encountered when calling layer "conv2d_4" (type Conv2D). module 'keras.api._v2.keras.backend' has no attribute 'x' Call arguments received by layer "conv2d_4" (type Conv2D): • inputs=tf.Tensor(shape=(None, 28, 28, 1), dtype=float32)
Trying to parse access.log
Good afternoon, I'm trying to find the top 10 ip in access.log (standard log of the Apache server). There is a code like this: import argparse import json import re from collections import defaultdict, Counter parser = argparse.ArgumentParser(description='parser script') parser.add_argument('-f', dest='logfile', action='store', default='access.log') args = parser.parse_args() regul_ip = (r"^(?P<ips>.*?)") regul_method = (r"\"(?P<request_method>GET|POST|PUT|DELETE|HEAD)") def req_by_method(): dict_ip = defaultdict(lambda: {"GET": 0, "POST": 0, "PUT": 0, "DELETE": 0, "HEAD": 0}) with open(args.logfile) as file: for index, line in enumerate(file.readlines()): try: ip = re.search(regul_ip, line).group() method = re.search(regul_method, line).groups()[0] return Counter(dict_ip).most_common(10) except AttributeError: pass dict_ip[ip][method] += 1 print(json.dumps(dict_ip, indent=4)) with open("final_log.json", "w") as jsonfile: json.dump(dict_ip, jsonfile, indent=5) When the code is executed, I only get: [] How can I fix this code to make it work? I also need to output to the final json file a set of such lines: "ip", "method", "status code", "url" and the duration of the request
FastAPI gunicon uvicorn access_log format customization
We are using the https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker FastAPI and were able to customize our logging with a gunicorn logging file. However, we are not able to change the details of the %(message)s attribute as defined in the documentation access log - https://docs.gunicorn.org/en/stable/settings.html#accesslog. We receive an error postet below, that the keys are unknown. A similar question has been asked before and received many upvotes. gunicorn log-config access_log_format What are we doing wrong? #start.sh # Start Gunicorn exec gunicorn -k uvicorn.workers.UvicornWorker -c "$GUNICORN_CONF" "$APP_MODULE" --log-config "/logging.conf" [loggers] keys=root, gunicorn.error, gunicorn.access,uvicorn.error,uvicorn.access [handlers] keys=console, error_file, access_file, access_filegunicorn [formatters] keys=generic, access, accessgunicorn [logger_root] level=INFO handlers=console propagate=1 [logger_gunicorn.error] level=INFO handlers=error_file propagate=0 qualname=gunicorn.error [logger_gunicorn.access] level=INFO handlers=access_filegunicorn propagate=0 qualname=gunicorn.access [logger_uvicorn.error] level=INFO handlers=error_file propagate=0 qualname=uvicorn.error [logger_uvicorn.access] level=INFO handlers=access_file propagate=0 qualname=uvicorn.access [handler_console] class=StreamHandler formatter=generic args=(sys.stdout, ) [handler_error_file] class=StreamHandler formatter=generic args=(sys.stdout, ) [handler_access_file] class=StreamHandler formatter=access args=(sys.stdout, ) [handler_access_filegunicorn] class=StreamHandler formatter=accessgunicorn args=(sys.stdout, ) [formatter_generic] format=[%(levelname)s]: %(message)s datefmt=%Y-%m-%dT%H:%M:%S class=logging.Formatter [formatter_access] format=[%(levelname)s]: %(message)s datefmt=%Y-%m-%dT%H:%M:%S class=logging.Formatter [formatter_accessgunicorn] format=[%(levelname)s]: '{"remote_ip":"%(h)s","session_id":"%({X-Session-Id}i)s","status":"%(s)s","request_method":"%(m)s","request_path":"%(U)s","request_querystring":"%(q)s","request_timetaken":"%(D)s","response_length":"%(B)s", "remote_addr": "%(h)s"}' datefmt=%Y-%m-%dT%H:%M:%S class=logging.Formatter Message: '%s - "%s %s HTTP/%s" %d' Arguments: ('213.3.14.24:53374', 'GET', '/v1/docs', '1.1', 200) --- Logging error --- Traceback (most recent call last): File "/usr/local/lib/python3.7/logging/__init__.py", line 1025, in emit msg = self.format(record) File "/usr/local/lib/python3.7/logging/__init__.py", line 869, in format return fmt.format(record) File "/usr/local/lib/python3.7/logging/__init__.py", line 611, in format s = self.formatMessage(record) File "/usr/local/lib/python3.7/logging/__init__.py", line 580, in formatMessage return self._style.format(record) File "/usr/local/lib/python3.7/logging/__init__.py", line 422, in format return self._fmt % record.__dict__ KeyError: 'h' Call stack: File "/usr/local/bin/gunicorn", line 8, in <module> sys.exit(run()) File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 58, in run WSGIApplication("%(prog)s [OPTIONS] [APP_MODULE]").run() File "/usr/local/lib/python3.7/site-packages/gunicorn/app/base.py", line 228, in run super().run() File "/usr/local/lib/python3.7/site-packages/gunicorn/app/base.py", line 72, in run Arbiter(self).run() File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 202, in run self.manage_workers() File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 545, in manage_workers self.spawn_workers() File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 616, in spawn_workers self.spawn_worker() File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 583, in spawn_worker worker.init_process() File "/usr/local/lib/python3.7/site-packages/uvicorn/workers.py", line 61, in init_process super(UvicornWorker, self).init_process() File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 140, in init_process self.run() File "/usr/local/lib/python3.7/site-packages/uvicorn/workers.py", line 70, in run loop.run_until_complete(server.serve(sockets=self.sockets)) File "/usr/local/lib/python3.7/site-packages/uvicorn/protocols/http/httptools_impl.py", line 385, in run_asgi result = await app(self.scope, self.receive, self.send) File "/usr/local/lib/python3.7/site-packages/uvicorn/middleware/proxy_headers.py", line 45, in __call__ return await self.app(scope, receive, send) File "/usr/local/lib/python3.7/site-packages/fastapi/applications.py", line 171, in __call__ await super().__call__(scope, receive, send) File "/usr/local/lib/python3.7/site-packages/starlette/applications.py", line 102, in __call__ await self.middleware_stack(scope, receive, send) File "/usr/local/lib/python3.7/site-packages/starlette/middleware/errors.py", line 159, in __call__ await self.app(scope, receive, _send) File "/usr/local/lib/python3.7/site-packages/starlette/middleware/cors.py", line 78, in __call__ await self.app(scope, receive, send) File "/usr/local/lib/python3.7/site-packages/starlette/exceptions.py", line 71, in __call__ await self.app(scope, receive, sender) File "/usr/local/lib/python3.7/site-packages/starlette/routing.py", line 550, in __call__ await route.handle(scope, receive, send)
I found very useful information here https://github.com/tiangolo/fastapi/issues/1508 I needed to add the request datetime , and the solution that I implemented was: #app.on_event("startup") async def startup_event(): logger = logging.getLogger("uvicorn.access") console_formatter = uvicorn.logging.ColourizedFormatter( "{asctime} {levelprefix} : {message}", style="{", use_colors=True) logger.handlers[0].setFormatter(console_formatter) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000)
Our solution was a customer logger written in python that is referenced in a logging.conf file logging.conf [loggers] keys=root, gunicorn.error, gunicorn.access,uvicorn.error,uvicorn.access [handlers] keys=console, error_file, access_file, accesscustom [formatters] keys=generic, access, AccessFormatter [logger_root] level=INFO handlers=console propagate=1 [logger_gunicorn.error] level=INFO handlers=error_file propagate=0 qualname=gunicorn.error [logger_gunicorn.access] level=INFO handlers=accesscustom propagate=0 qualname=gunicorn.access [logger_uvicorn.error] level=INFO handlers=error_file propagate=0 qualname=uvicorn.error [logger_uvicorn.access] level=INFO handlers=accesscustom propagate=0 qualname=uvicorn.access [handler_console] class=StreamHandler formatter=generic args=(sys.stdout, ) [handler_error_file] class=StreamHandler formatter=generic args=(sys.stdout, ) [handler_access_file] class=StreamHandler formatter=access args=(sys.stdout, ) [handler_accesscustom] class=StreamHandler formatter=AccessFormatter args=(sys.stdout, ) [formatter_generic] format=%(levelname)s: %(message)s datefmt=%Y-%m-%dT%H:%M:%S class=uvicorn.logging.DefaultFormatter [formatter_access] format=%(levelname)s: %(message)s datefmt=%Y-%m-%dT%H:%M:%S class=customlogger.CustomFormatter [formatter_AccessFormatter] format={"event":"access_log","ip":"%(h)s","status":"%(s)s","method":"%(m)s","path":"%(U)s","referer":"%(f)s","x_session_id":"%(x-session-id)s","x_google_id":"%(x-google-id)s","x_server_time":"%(x-server-time)s","agent":"%(a)s"} datefmt=%Y-%m-%dT%H:%M:%S class=customlogger.CustomFormatter customlogger.py import base64 import binascii import http import logging import os import sys import time from copy import copy from datetime import datetime from pprint import pprint import click TRACE_LOG_LEVEL = 5 class ColourizedFormatter(logging.Formatter): """ A custom log formatter class that: * Outputs the LOG_LEVEL with an appropriate color. * If a log call includes an `extras={"color_message": ...}` it will be used for formatting the output, instead of the plain text message. """ level_name_colors = { TRACE_LOG_LEVEL: lambda level_name: click.style(str(level_name), fg="blue"), logging.DEBUG: lambda level_name: click.style(str(level_name), fg="cyan"), logging.INFO: lambda level_name: click.style(str(level_name), fg="green"), logging.WARNING: lambda level_name: click.style(str(level_name), fg="yellow"), logging.ERROR: lambda level_name: click.style(str(level_name), fg="red"), logging.CRITICAL: lambda level_name: click.style( str(level_name), fg="bright_red" ), } def __init__(self, fmt=None, datefmt=None, style="%", use_colors=None): if use_colors in (True, False): self.use_colors = use_colors else: self.use_colors = sys.stdout.isatty() super().__init__(fmt=fmt, datefmt=datefmt, style=style) def color_level_name(self, level_name, level_no): default = lambda level_name: str(level_name) func = self.level_name_colors.get(level_no, default) return func(level_name) def should_use_colors(self): return True def formatMessage(self, record): recordcopy = copy(record) levelname = recordcopy.levelname seperator = " " * (8 - len(recordcopy.levelname)) if self.use_colors: levelname = self.color_level_name(levelname, recordcopy.levelno) if "color_message" in recordcopy.__dict__: recordcopy.msg = recordcopy.__dict__["color_message"] recordcopy.__dict__["message"] = recordcopy.getMessage() recordcopy.__dict__["levelprefix"] = levelname + ":" + seperator return super().formatMessage(recordcopy) class DefaultFormatter(ColourizedFormatter): def should_use_colors(self): return sys.stderr.isatty() class AccessFormatter(ColourizedFormatter): status_code_colours = { 1: lambda code: click.style(str(code), fg="bright_white"), 2: lambda code: click.style(str(code), fg="green"), 3: lambda code: click.style(str(code), fg="yellow"), 4: lambda code: click.style(str(code), fg="red"), 5: lambda code: click.style(str(code), fg="bright_red"), } def get_client_addr(self, scope): client = scope.get("client") if not client: return "" return "%s:%d" % (client[0], client[1]) def get_path(self, scope): return scope.get("root_path", "") + scope["path"] def get_full_path(self, scope): path = scope.get("root_path", "") + scope["path"] query_string = scope.get("query_string", b"").decode("ascii") if query_string: return path + "?" + query_string return path def get_status_code(self, record): status_code = record.__dict__["status_code"] try: status_phrase = http.HTTPStatus(status_code).phrase except ValueError: status_phrase = "" status_and_phrase = "%s %s" % (status_code, status_phrase) if self.use_colors: default = lambda code: status_and_phrase func = self.status_code_colours.get(status_code // 100, default) return func(status_and_phrase) return status_and_phrase def formatMessage(self, record): recordcopy = copy(record) scope = recordcopy.__dict__["scope"] method = scope["method"] path = self.get_path(scope) full_path = self.get_full_path(scope) client_addr = self.get_client_addr(scope) status_code = self.get_status_code(recordcopy) http_version = scope["http_version"] request_line = "%s %s HTTP/%s" % (method, full_path, http_version) if self.use_colors: request_line = click.style(request_line, bold=True) recordcopy.__dict__.update( { "method": method, "path": path, "full_path": full_path, "client_addr": client_addr, "request_line": request_line, "status_code": status_code, "http_version": http_version, } ) return super().formatMessage(recordcopy) class SafeAtoms(dict): def __init__(self, atoms): dict.__init__(self) for key, value in atoms.items(): if isinstance(value, str): self[key] = value.replace('"', '\\"') else: self[key] = value def __getitem__(self, k): if k.startswith("{"): kl = k.lower() if kl in self: return super().__getitem__(kl) else: return "-" if k in self: return super().__getitem__(k) else: return '-' class CustomFormatter(AccessFormatter): atoms_wrapper_class = SafeAtoms def now(self): """ return date in Apache Common Log Format """ return time.strftime('[%d/%b/%Y:%H:%M:%S %z]') def _get_user(self, environ): user = None http_auth = environ.get("HTTP_AUTHORIZATION") if http_auth and http_auth.lower().startswith('basic'): auth = http_auth.split(" ", 1) if len(auth) == 2: try: # b64decode doesn't accept unicode in Python < 3.3 # so we need to convert it to a byte string auth = base64.b64decode(auth[1].strip().encode('utf-8')) # b64decode returns a byte string auth = auth.decode('utf-8') auth = auth.split(":", 1) except (TypeError, binascii.Error, UnicodeDecodeError) as exc: self.debug("Couldn't get username: %s", exc) return user if len(auth) == 2: user = auth[0] return user def atoms(self, environ, request_time, scope, statuscode, created): headers = dict(scope.get('headers',[('-','-')])) response_headers = dict(scope.get('response_headers',[('-','-')])) atoms = { 'h': scope.get("client", ('-', ''))[0], 'l': '-', 's': statuscode, 'u': self._get_user(environ) or '-', 't': created, 'm': str(scope.get("method", "-")), 'U': scope.get("path", "-"), 'q': scope.get("query_string", "-").decode("utf-8"), 'H': str(scope.get("type", "-")), 'f': headers.get(b"referer", b"-").decode("utf-8"), 'a': headers.get(b"user-agent", b"-").decode("utf-8"), 'x-session-id': headers.get(b"x-session-id", b"-").decode("utf-8"), 'x-google-id': headers.get(b"x-google-id", b"-").decode("utf-8"), 'x-server-time': response_headers.get(b"x-server-time", b"").decode("utf-8"), 'p': "<%s>" % os.getpid() } return atoms def formatMessage(self, record): recordcopy = copy(record) scope = recordcopy.__dict__["scope"] #pprint(vars(recordcopy)) safe_atoms = self.atoms_wrapper_class( self.atoms(os.environ, datetime.now(), scope, recordcopy.status_code, recordcopy.created) ) recordcopy.__dict__.update(safe_atoms) # pprint(vars(os.environ)) return super().formatMessage(recordcopy)
I'm also using the FastAPI - Uvicorn - Gunicorn stack. To modify the format of the uvicorn logging, I first inspected its current configuration : >>> from pprint import pprint >>> import uvicorn.config >>> pprint(uvicorn.config.LOGGING_CONFIG) {'disable_existing_loggers': False, 'formatters': {'access': {'()': 'uvicorn.logging.AccessFormatter', 'fmt': '%(levelprefix)s %(client_addr)s - ' '"%(request_line)s" %(status_code)s'}, 'default': {'()': 'uvicorn.logging.DefaultFormatter', 'fmt': '%(levelprefix)s %(message)s', 'use_colors': None}}, 'handlers': {'access': {'class': 'logging.StreamHandler', 'formatter': 'access', 'stream': 'ext://sys.stdout'}, 'default': {'class': 'logging.StreamHandler', 'formatter': 'default', 'stream': 'ext://sys.stderr'}}, 'loggers': {'uvicorn': {'handlers': ['default'], 'level': 'INFO'}, 'uvicorn.access': {'handlers': ['access'], 'level': 'INFO', 'propagate': False}, 'uvicorn.error': {'level': 'INFO'}}, 'version': 1} And I created my own logging configuration based on the default configuration of uvicorn. I added the date/time of the log, and my own custom logger : import logging LOGGER_NAME = "myapp" log_config = { "version": 1, "disable_existing_loggers": False, "formatters": { 'access': { '()': 'uvicorn.logging.AccessFormatter', 'fmt': '%(levelprefix)s %(asctime)s - %(client_addr)s - "%(request_line)s" %(status_code)s', "datefmt": "%Y-%m-%d %H:%M:%S", "use_colors": True }, "default": { "()": "uvicorn.logging.DefaultFormatter", "fmt": "%(levelprefix)s %(asctime)s - %(message)s", "datefmt": "%Y-%m-%d %H:%M:%S", "use_colors": True }, }, "handlers": { 'access': { 'class': 'logging.StreamHandler', 'formatter': 'access', 'stream': 'ext://sys.stdout' }, "default": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, }, "loggers": { LOGGER_NAME: { "handlers": ["default"], "level": "DEBUG", "propagate": False }, "uvicorn": { "handlers": ["default"], "level": "DEBUG", "propagate": True }, 'uvicorn.access': { 'handlers': ['access'], 'level': 'INFO', 'propagate': False }, 'uvicorn.error': { 'level': 'INFO', 'propagate': False } }, } def get_logger(): return logging.getLogger(LOGGER_NAME) Then in my main.py file, where I defined app = FastAPI(...), I configure the logging just after my imports lines with : logging.config.dictConfig(log_config) And I do custom logging in my app by using the custom logger that I defined : logger = get_logger() logger.info("Hello World!")
Hello Getting parsing json file
I need some help here parsing a json data : My json File contain this { "message": "{\"gender\":\"MADAME\",\"Polo\":\"POTA\",\"last_name\":\"pato\",\"email\":\"pato.pota#mailler.com\",\"subject\":\"toto claim\",\"sub_subject\":\"Claim insurance car\",\"question\":\"claim for red car\",\"store\":\"claiming for incident\"}", "context": [ ], "level": 200, "level_name": "INFO", "channel": "mailer", "datetime": { "date": "2016-09-19 11:00:26.795353", "timezone_type": 3, }, "extra": [ ] } Python Code. import os import json def Get_running_dir(): path = os.getcwd() file = path + "\json_data.txt" print(file) with open(file, 'r') as f: data = f.read() data_json = json.loads(data) print(data_json) print(type(data_json)) Get_running_dir() The issue is { print(type(data_json))} this is a dict right. Once I call this print(data_json['message']['gender']) <class 'dict'> Traceback (most recent call last): File "Extract_log.py", line 29, in <module> Get_running_dir() File "Extract_log.py", line 25, in Get_running_dir print(data_json['message']['gender']) TypeError: string indices must be integers I need some help to parse this file please help me. Thanking you in advance. Regards,
I figured how to work with the json, this out today. import os import json def Get_running_dir(): path = os.getcwd() file = path + "\json_data.txt" print(file) with open(file, 'r') as f: data = f.read() data_json = json.loads(data) # My error was here: print(data_json['message']) # This convert to String. msg = json.loads(data_json['message']) # THIS CONVERT THE STRING TO #Dict. # this way i can access its keys. # Like this. print(msg['gender'] ,msg['first_name'], msg['last_name'])