Attribute error when sending json with Requests from mediainfo AWS Lambda - json

I m trying to send video metadata to MongoDB Atlas from an s3 bucket with media info as an AWS Lambda in Python 3.7. It works when I hardcode the data, but I receive either an object key error or an attribute error. I tried importing json and using json.loads() and json.dumps() and receive errors. Here's the code,
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
import json
import xmltodict
import subprocess
import os
import uuid
import requests
import json
# Generate a presigned URL for S3 Object
def get_presigned_url(expiration, objBucket, objKey):
s3 = boto3.client('s3', 'us-east-1', config=Config(s3={'addressing_style': 'virtual'}))
Params={
'Bucket': objBucket,
'Key': objKey
}
print ('CORE:: Media Evaluator:: Bucket Name: ' + objBucket)
print ('CORE:: Media Evaluator:: Object Key: ' + objKey)
try:
response = s3.generate_presigned_url('get_object', Params, ExpiresIn=expiration)
print('this is the response !!!!!!!!!', response)
except ClientError as e:
print (e)
raise
return None
return response
# Get MD5 from S3 Object Tag
def get_md5_from_objectTag(objBucket, objKey):
s3 = boto3.client('s3', 'us-east-1', config=Config(s3={'addressing_style': 'virtual'}))
response = s3.get_object_tagging(
Bucket=objBucket,
Key=objKey
)
for tag in response['TagSet']:
if tag['Key'] == 'MD5':
md5 = tag['Value']
return md5
# Evaluate the media using MediaInfo
def get_mediainfo(presignedURL):
print("presignedURL !!!!!!!!!!!!!", presignedURL)
try:
output = subprocess.check_output(['./mediainfo', '--full', '--output=JSON', presignedURL], shell=False, stderr=subprocess.STDOUT)
# subprocess.check_output(["./mediainfo", "--full", "--output=JSON", presignedURL], stdin=None, stderr=subprocess.STDOUT, shell=True)
print("this si the output !!!!!!!!!!!!!!!!!", output)
json_output = json.loads(output)
print("this is the JOSN output!!!!!!", json_output)
# Prepare the short media analysis JSON
json_track = json_output['media']
for f in json_track['track']:
if f['#type'] == 'General':
wrapper = f['Format']
if f['#type'] == 'Video':
wrapperType = f['Format_Profile']
codec = f['Format']
bitRate = f['BitRate']
#frameRate = f['FrameRate']
#som = f['TimeCode_FirstFrame']
width = f['Width']
height = f['Height']
#framecount = f['FrameCount']
#if f['#type'] == 'Other' and f['Type'] == 'Time code':
#som = f['TimeCode_FirstFrame']
mediaResult = {
"Wrapper": wrapper + wrapperType,
"Codec": codec,
"BitRate": bitRate,
#"FrameRate": frameRate,
#"SOM": som,
"Resolution": width + 'X' + height,
#"FrameCount": framecount
}
except Exception as e:
print (e)
raise
return None
return mediaResult
def lambda_handler(event, context):
print("this is the event !!!!!!!!!!!!!!!", event)
bucketName = event['bucketName']
objectName = event['objectKey']
signed_url = get_presigned_url(3600, bucketName, objectName)
print ('CORE:: Media Evaluator:: Presigned URL: ' + signed_url)
mediaAnalysis = get_mediainfo(signed_url)
print('CORE:: Media Evaluator:: Parsed Media Analysis: ' + json.dumps(mediaAnalysis, indent=4))
#md5 = get_md5_from_objectTag(bucketName, objectName)
#print('CORE:: Media Evaluator:: MD5 Value from Object Tag: ' + md5)
ingestData = {
"MediaEvaluation": mediaAnalysis,
#"MD5": md5,
"AssetID": objectName.partition(".")[0],
"AssetUID": uuid.uuid4().hex,
"Bucket": bucketName,
"ObjectKey": objectName
}
print('this is ingestData !!!!!!!!!!!!!!!!!!!!!!!', ingestData)
print('this is mediaAnalysis !!!!!!!!!!!!!!!!!!!!!!!', mediaAnalysis)
#ingestDataJson = json.dumps(ingestData)
#mediaAnalysisJson = json.dumps(mediaAnalysis)
dataNotFake = { 's3url': ingestData.Bucket + '.s3.amazonaws.com/' + ingestData.objectKey,
'Codec': mediaAnalysis.Codec, 'resolution': mediaAnalysis.Resolution}
r = requests.post(url ='<mongobdAtlasUrlConnectStringHere>', data = dataNotFake)
return ingestData
what I am sending the Lambda is,
{
"bucketName": "<s3-bucket-name>",
"objectKey": "<video-file-name>"
}
edit:
the initial response
{
"errorMessage": "'str' object has no attribute 'Bucket'",
"errorType": "AttributeError",
"stackTrace": [
" File \"/var/task/mediaEvaluator.py\", line 107, in lambda_handler\n dataNotFake = { 's3url': ingestDataJson.Bucket + '.s3.amazonaws.com/' + ingestDataJson.objectKey, 'Codec': mediaAnalysisJson.Codec, 'resolution': mediaAnalysisJson.Resolution}\n"
]
}
and, then the logs
7c4Uwzk422qMqIJfcFOZr6kwDasq3AEIy%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgw2MTYwODIzMjAyOTEiDHJGza1WClZUa%2BZKEyqwAWPyeYiNirreNeRtXlelQjCxUnEj3HMZiMF2UlPLpzwY3RRNo5LBpieIuIRUm7HtbWqXp19lDsUBHiGSoZqmNW5i5EN5nTPe6c6LNdKYFXYutvd4X1dsVEij8srY0DW%2FjBpiZxGt3DlhLWiDtoA8EjgXEe4JcpvU6Z9EpJeotjFhBzfe%2BM7xoPYE%2BoYS4ipx0nyntPQ4Qia1Cdh9LBwsHbcPL59JeI27lVmkggCFevZXMLqPzIEGOuABHPLi%2B3fu1bxwoDJYaA0HOwnAbF%2FncPMWpIR9NATDHyq%2B6BbaOxFAygyNXC%2FAjjqCEOezv1yfZ0VCMAP9i0Wi%2BBqgL8s4Qbuwk1PdgSfZdwqxrSOynSeX6s7Z5au9QYn%2BY%2F5upVr%2F5dt6Q8veRAWuqEQx4muzEix0jorBm4j1KAmuTYfv3A71Hv9YfhMmbR6h4XZv1U8nQpqNJNIJ%2FC%2BBBbRuXDWMhbfnK6IiXw9e3VWqQa7Esjj0WqHgOZ1wWGLZvqqy5Re%2Bm%2BF9eFdE%2F3mUv516aeU31eZ0gkHxnZGZ6HY%3D&Expires=1613960653', 'track': [{'#type': 'General', 'ID': '1', 'VideoCount': '1', 'MenuCount': '1', 'FileExtension': 'mpg?AWSAccessKeyId=ASIAY64K6FOR54JVBB5W&Signature=b16%2BkSWWYrrZvBlYA5tvs7DsqO4%3D&x-amz-security-token=IQoJb3JpZ2luX2VjEML%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCyP7t3a9Y6%2F4brzVhIOH5GEMpkargZlHCnX55tbiJKwQIgdBNqAwljhCiLZbf%2F7c4Uwzk422qMqIJfcFOZr6kwDasq3AEIy%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAAGgw2MTYwODIzMjAyOTEiDHJGza1WClZUa%2BZKEyqwAWPyeYiNirreNeRtXlelQjCxUnEj3HMZiMF2UlPLpzwY3RRNo5LBpieIuIRUm7HtbWqXp19lDsUBHiGSoZqmNW5i5EN5nTPe6c6LNdKYFXYutvd4X1dsVEij8srY0DW%2FjBpiZxGt3DlhLWiDtoA8EjgXEe4JcpvU6Z9EpJeotjFhBzfe%2BM7xoPYE%2BoYS4ipx0nyntPQ4Qia1Cdh9LBwsHbcPL59JeI27lVmkggCFevZXMLqPzIEGOuABHPLi%2B3fu1bxwoDJYaA0HOwnAbF%2FncPMWpIR9NATDHyq%2B6BbaOxFAygyNXC%2FAjjqCEOezv1yfZ0VCMAP9i0Wi%2BBqgL8s4Qbuwk1PdgSfZdwqxrSOynSeX6s7Z5au9QYn%2BY%2F5upVr%2F5dt6Q8veRAWuqEQx4muzEix0jorBm4j1KAmuTYfv3A71Hv9YfhMmbR6h4XZv1U8nQpqNJNIJ%2FC%2BBBbRuXDWMhbfnK6IiXw9e3VWqQa7Esjj0WqHgOZ1wWGLZvqqy5Re%2Bm%2BF9eFdE%2F3mUv516aeU31eZ0gkHxnZGZ6HY%3D&Expires=1613960653', 'Format': 'MPEG-TS', 'FileSize': '102004664', 'Duration': '200.236328125', 'OverallBitRate_Mode': 'CBR', 'OverallBitRate': '4075040', 'StreamSize': '7646822', 'extra': {'OverallBitRate_Precision_Min': '4075030', 'OverallBitRate_Precision_Max': '4075051', 'FileExtension_Invalid': 'ts m2t m2s m4t m4s tmf ts tp trp ty'}}, {'#type': 'Video', 'StreamOrder': '0-0', 'ID': '481', 'MenuID': '1', 'Format': 'AVC', 'Format_Profile': 'Main', 'Format_Level': '4.1', 'Format_Settings_CABAC': 'Yes', 'Format_Settings_RefFrames': '1', 'Format_Settings_GOP': 'M=1, N=15', 'CodecID': '27', 'Duration': '194.867', 'BitRate': '3873733', 'Width': '1280', 'Height': '720', 'Sampled_Width': '1280', 'Sampled_Height': '720', 'PixelAspectRatio': '1.000', 'DisplayAspectRatio': '1.778', 'ColorSpace': 'YUV', 'ChromaSubsampling': '4:2:0', 'BitDepth': '8', 'ScanType': 'Progressive', 'Delay': '10.000000', 'StreamSize': '94357842'}, {'#type': 'Menu', 'StreamOrder': '0', 'ID': '480', 'MenuID': '1', 'Format': 'AVC / KLV', 'Duration': '200.236328125', 'Delay': '0.001107222', 'List_StreamKind': '1 / ', 'List_StreamPos': '0 / ', 'extra': {'pointer_field': '0', 'section_length': '55'}}]}}
CORE:: Media Evaluator:: Parsed Media Analysis: {
"Wrapper": "MPEG-TSMain",
"Codec": "AVC",
"BitRate": "3873733",
"Resolution": "1280X720"
}
this is ingestData !!!!!!!!!!!!!!!!!!!!!!! {'MediaEvaluation': {'Wrapper': 'MPEG-TSMain', 'Codec': 'AVC', 'BitRate': '3873733', 'Resolution': '1280X720'}, 'AssetID': 'Day Flight', 'AssetUID': 'cb0651e1151f41a5a890425d41bda3cb', 'Bucket': 'video-uploads-94viwznlhm88', 'ObjectKey': 'Day Flight.mpg'}
this is mediaAnalysis !!!!!!!!!!!!!!!!!!!!!!! {'Wrapper': 'MPEG-TSMain', 'Codec': 'AVC', 'BitRate': '3873733', 'Resolution': '1280X720'}
[ERROR] AttributeError: 'str' object has no attribute 'Bucket'
Traceback (most recent call last):
  File "/var/task/mediaEvaluator.py", line 107, in lambda_handler
    dataNotFake = { 's3url': ingestDataJson.Bucket + '.s3.amazonaws.com/' + ingestDataJson.objectKey, 'Codec': mediaAnalysisJson.Codec, 'resolution': mediaAnalysisJson.Resolution}
END RequestId: bfcf3bb8-d709-44c5-9e42-8c7a6a31a879
REPORT RequestId: bfcf3bb8-d709-44c5-9e42-8c7a6a31a879 Duration: 6679.29 ms Billed Duration: 6680 ms Memory Size: 128 MB Max Memory Used: 104 MB Init Duration: 382.19 ms

I think here
dataNotFake = { 's3url': ingestData.Bucket + '.s3.amazonaws.com/' + ingestData.objectKey,
considering these
ingestData = {'MediaEvaluation': {'Wrapper': 'MPEG-TSMain',
'Codec': 'AVC',
'BitRate': '3873733',
'Resolution': '1280X720'},
'AssetID': 'Day Flight',
'AssetUID': 'cb0651e1151f41a5a890425d41bda3cb',
'Bucket': 'video-uploads-94viwznlhm88',
'ObjectKey': 'Day Flight.mpg'}
mediaAnalysis = {
'Wrapper': 'MPEG-TSMain', 'Codec': 'AVC', 'BitRate': '3873733', 'Resolution': '1280X720'}
You should use something like this
dataNotFake = {
's3url': f'{ingestData["Bucket"]}.s3.amazonaws.com/{ingestData["ObjectKey"]}',
'Codec': mediaAnalysis['Codec'],
'resolution': mediaAnalysis['Resolution']
}

I still cannot drill into ingestData, but I managed to avoid the issue by passing bucketName and objectName to create the s3 URL to link to the upload data in mongo and passing the whole mediaAnalysis to mongo.
dataNotFake = { "s3url": bucketName + '.s3.amazonaws.com/' + objectName, "mediaAnalysis": mediaAnalysis}

Related

web3py EthereumTesterProvider - Basic transaction fails

On the web3py EthereumTesterProvider blockchain, I send 2 ethers from the first test account to the second one. The python program ends normally but the transaction seems to fail :
the status of the transaction receipt ('status': 0), which I guess means failed.
the balance of the 2 accounts is not updated after the transaction.
pip config (windows 10) :
web3py (5.31.3)
eth-tester (0.8.0b3)
Code:
from web3 import Web3, EthereumTesterProvider
from pprint import pprint
w3 = Web3(EthereumTesterProvider())
print(f"{w3.isConnected() = }")
print(f"\n{w3.eth.get_balance(w3.eth.accounts[0]) = }")
print(f"{w3.eth.get_balance(w3.eth.accounts[1]) = }")
tx_hash = w3.eth.send_transaction(
{
'from': w3.eth.accounts[0],
'to': w3.eth.accounts[1],
'value': w3.toWei(2, 'ether'),
'gas': 21000
}
)
print(f"\n{tx_hash = }")
print("\ntx_receipt = ")
tx_receipt = w3.eth.wait_for_transaction_receipt(tx_hash)
pprint(dict(tx_receipt))
print(f"\n{w3.eth.get_balance(w3.eth.accounts[0]) = }")
print(f"{w3.eth.get_balance(w3.eth.accounts[1]) = }")
Traces:
w3.isConnected() = True
w3.eth.get_balance(w3.eth.accounts[0]) = 1000000000000000000000000
w3.eth.get_balance(w3.eth.accounts[1]) = 1000000000000000000000000
tx_hash = HexBytes('0x72345d1c23a10ac3849e1f8e53b517da8200e58ab211ebaf44df732f6f8a29af')
tx_receipt =
{'blockHash': HexBytes('0xadfcec5788a8757a5eed6aeee15c997a6b75a612fa4919a878b02a69d04f8a0c'),
'blockNumber': 1,
'contractAddress': '0xa0Beb7081fDaF3ed157370836A85eeC20CEc9e04',
'cumulativeGasUsed': 21000,
'effectiveGasPrice': 1000000000,
'from': '0xaBbACadABa000000000000000000000000000000',
'gasUsed': 21000,
'logs': [],
'state_root': b'\x00',
'status': 0, <======== failure ==========
'to': '0xaBbACaDaBA000000000000000000000000000001',
'transactionHash': HexBytes('0x72345d1c23a10ac3849e1f8e53b517da8200e58ab211ebaf44df732f6f8a29af'),
'transactionIndex': 0,
'type': '0x2'}
w3.eth.get_balance(w3.eth.accounts[0]) = 1000000000000000000000000
w3.eth.get_balance(w3.eth.accounts[1]) = 1000000000000000000000000
Please also note that this code works with ganache when I change the instance of web3 like that :
w3 = Web3(Web3.HTTPProvider('HTTP://127.0.0.1:8545'))

How to create a valid signature message to POST an order to the Kucoin Future API?

I am trying to place an order but it gives me this error:
{"code":"400005","msg":"Invalid KC-API-SIGN"}
I'll be so thankful if someone check my code and let me know the problem
import requests
import time
import base64
import hashlib
import hmac
import json
import uuid
api_key = 'XXXXXXXXXXXXXXXXXXXXXXX'
api_secret = 'XXXXXXXXXXXXXXXXXXXXXX'
api_passphrase = 'XXXXXXXXXXXXXXX'
future_base_url = "https://api-futures.kucoin.com"
clientOid = uuid.uuid4().hex
params = {
"clientOid": str(clientOid),
"side": str(side),
"symbol": str(symbol),
"type": "limit",
"leverage": "5",
"stop": "down",
"stopPriceType": "TP",
"price": str(price),
"size": int(size),
"stopPrice": str(stopprice)
}
json_params = json.dumps(params)
print(json_params)
now = int(time.time() * 1000)
str_to_sign = str(now) + 'POST' + '/api/v1/orders' + json_params
signature = base64.b64encode(hmac.new(api_secret.encode('utf-8'), str_to_sign.encode('utf-8'), hashlib.sha256).digest())
passphrase = base64.b64encode(hmac.new(api_secret.encode('utf-8'), api_passphrase.encode('utf-8'), hashlib.sha256).digest())
headers = {
"KC-API-SIGN": signature,
"KC-API-TIMESTAMP": str(now),
"KC-API-KEY": api_key,
"KC-API-PASSPHRASE": passphrase,
"KC-API-KEY-VERSION": "2",
"Content-Type": "application/json"
}
response = requests.request('POST', future_base_url + '/api/v1/orders', params=params, headers=headers)
print(response.text)
This worked for me:
tickerK = "AVAXUSDTM"
clientOid = tickerK + '_' + str(now)
side = "buy"
typee = "market"
leverage = "2"
stop = "up"
stopPriceType = "TP"
stopPrice = "12"
size = "3"
# Set the request body
data = {
"clientOid":clientOid,
"side":side,
"symbol":tickerK,
"type":typee,
"leverage":leverage,
"stop":stop,
"stopPriceType":stopPriceType,
"stopPrice":stopPrice,
"size":size
}
data_json = json.dumps(data, separators=(',', ':'))
data_json
url = 'https://api-futures.kucoin.com/api/v1/orders'
now = int(time() * 1000)
str_to_sign = str(now) + 'POST' + '/api/v1/orders' + data_json
signature = base64.b64encode(
hmac.new(api_secret.encode('utf-8'), str_to_sign.encode('utf-8'), hashlib.sha256).digest())
passphrase = base64.b64encode(hmac.new(api_secret.encode('utf-8'), api_passphrase.encode('utf-8'), hashlib.sha256).digest())
headers = {
"KC-API-SIGN": signature,
"KC-API-TIMESTAMP": str(now),
"KC-API-KEY": api_key,
"KC-API-PASSPHRASE": passphrase,
"KC-API-KEY-VERSION": "2",
"Content-Type": "application/json"
}
# Send the POST request
response = requests.request('post', url, headers=headers, data=data_json)
# Print the response
print(response.json())
Please take care of the lines marked in red:
Remove spaces from the json
Add the json to the string to sign
Add content type to the header
Do the request this way

Odoo V11 restapi - TypeError(repr(o) + “ is not JSON serializable”)

Am using odoo v11 integrating with restapi though working fine in CRUD operations when getting report data with error
file.py
def generate_report(self, xml_id, ids):
self_reports = {}
self_reports = {'result': False, 'state': False, 'exception': None}
try:
result, format = request.env.ref(xml_id).sudo().render_qweb_pdf([ids])
if not result:
tb = sys.exc_info()
self_reports['exception'] = odoo.exceptions.DeferredException('RML is not available at specified location or not enough data to print!', tb)
self_reports['result'] = result
self_reports['format'] = format
self_reports['state'] = True
self_reports.update({'id': ids})
except Exception as exception:
_logger.exception('Exception: %s\n', exception)
if hasattr(exception, 'name') and hasattr(exception, 'value'):
self_reports['exception'] = odoo.exceptions.DeferredException(tools.ustr(exception.name), tools.ustr(exception.value))
else:
tb = sys.exc_info()
self_reports['exception'] = odoo.exceptions.DeferredException(tools.exception_to_unicode(exception), tb)
self_reports['state'] = True
exc = self_reports['exception']
if exc:
raise UserError('%s: %s' % (exc.message, exc.traceback))
if self_reports['state']:
if tools.config['reportgz']:
import zlib
res2 = zlib.compress(result)
else:
if isinstance(result, str):
res2 = result.encode('latin1', 'replace')
else:
res2 = result
if res2:
self_reports['result'] = base64.encodestring(res2)
return self_reports
def get_response(self, status_code, status, data=None):
"""Returns Response Object with given status code and status"""
response = Response()
response.status = status
if data:
response.data = isinstance(data, str) and data or json.dumps(data)
response.status_code = status_code
return response
this is data format
list: [{'exception': None, 'state': True, 'id': 3, 'format': 'pdf', 'result':
b'SlZCRVJpMHhMak1LTVNBd0lHOWlhZ284UEFvdlZIbHdaU0F2VUdGblpYTUtMME52ZFc1MElERUtM\nMHRwWkh
NZ1d5QXpJREFnVWlCZApDajQrQ21WdVpHOWlhZ295SURBZ2IySnFDanc4Q2k5UWNtOWtk\nV05sY2lBb1VIbFFSR
Vl5S1FvK1BncGxibVJ2WW1vS015QXdJRzlpCmFnbzhQQW92VW1WemIzVnlZ\nMlZ6SURVZ01DQlNDaTlCYm01dmR
ITWdNVGdnTUNCU0NpOVFZWEpsYm5RZ01TQXdJRklLTDFSNWNH\nVWcKTDFCaFoyVUtMMDFsWkdsaFFtOTRJR
nNnTUNBd0lEWXhNaUEzT1RJZ1hRb3ZRMjl1ZEdWdWRI\nTWdNVGtnTUNCU0NqNCtDbVZ1Wkc5aQphZ28wSUR
BZ2IySnFDanc4Q2k5VWVYQmxJQzlEWVhSaGJH\nOW5DaTlRWVdkbGN5QXhJREFnVWdvK1BncGxibVJ2WW1vS05T
QXdJRzlpCmFnbzhQQW92UTI5c2Iz\nSlRjR0ZqWlNBOFBBb3ZRMU53SUM5RVpYWnBZMlZTUjBJS0wwTlRjR2NnTD
BSbGRtbGpaVWR5WVhr\nS0wxQkQKVTNBZ05pQXdJRklLUGo0S0wwVjRkRWRUZEdGMFpTQThQQW92UjFOaElEY
2dNQ0JTQ2o0\nK0NpOUdiMjUwSUR3OENpOUdPQ0E0SURBZwpVZ292UmpjZ01UTWdNQ0JTQ2o0K0NpOVFZWFI ...
Error Traceback:
File "E:\Odoo\odoo11\addons\restapi\controllers\main.py", line 343, in call_report
return self.get_response(200, str(200), {'report': datas})
File "E:\Odoo\odoo11\addons\restapi\controllers\main.py", line 135, in get_response
response.data = isinstance(data, str) and data or json.dumps(data)
File "C:\Program Files\Python\Python35\lib\json\__init__.py", line 230, in dumps
return _default_encoder.encode(obj)
File "C:\Program Files\Python\Python35\lib\json\encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "C:\Program Files\Python\Python35\lib\json\encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "C:\Program Files\Python\Python35\lib\json\encoder.py", line 180, in default
raise TypeError(repr(o) + " is not JSON serializable")
I need the pdf report in binary data in api response , getting error in response data. Please anyone help me to resolve this
This is because you might you forget to import json lib. In your header declare like import json this might help you to solve your error.

Kinesis Firehose putting JSON objects in S3 without seperator comma

Before sending the data I am using JSON.stringify to the data and it looks like this
{"data": [{"key1": value1, "key2": value2}, {"key1": value1, "key2": value2}]}
But once it passes through AWS API Gateway and Kinesis Firehose puts it to S3 it looks like this
{
"key1": value1,
"key2": value2
}{
"key1": value1,
"key2": value2
}
The seperator comma between the JSON objects are gone but I need it to process data properly.
Template in the API Gateway:
#set($root = $input.path('$'))
{
"DeliveryStreamName": "some-delivery-stream",
"Records": [
#foreach($r in $root.data)
#set($data = "{
""key1"": ""$r.value1"",
""key2"": ""$r.value2""
}")
{
"Data": "$util.base64Encode($data)"
}#if($foreach.hasNext),#end
#end
]
}
I had this same problem recently, and the only answers I was able to find were basically just to add line breaks ("\n") to the end of every JSON message whenever you posted them to the Kinesis stream, or to use a raw JSON decoder method of some sort that can process concatenated JSON objects without delimiters.
I posted a python code solution which can be found over here on a related Stack Overflow post:
https://stackoverflow.com/a/49417680/1546785
One approach you could consider is to configure data processing for your Kinesis Firehose delivery stream by adding a Lambda function as its data processor, which would be executed before finally delivering the data to the S3 bucket.
DeliveryStream:
...
Type: AWS::KinesisFirehose::DeliveryStream
Properties:
DeliveryStreamType: DirectPut
ExtendedS3DestinationConfiguration:
...
BucketARN: !GetAtt MyDeliveryBucket.Arn
ProcessingConfiguration:
Enabled: true
Processors:
- Parameters:
- ParameterName: LambdaArn
ParameterValue: !GetAtt MyTransformDataLambdaFunction.Arn
Type: Lambda
...
And in the Lambda function, ensure that '\n' is appended to the record's JSON string, see below the Lambda function myTransformData.ts in Node.js:
import {
FirehoseTransformationEvent,
FirehoseTransformationEventRecord,
FirehoseTransformationHandler,
FirehoseTransformationResult,
FirehoseTransformationResultRecord,
} from 'aws-lambda';
const createDroppedRecord = (
recordId: string
): FirehoseTransformationResultRecord => {
return {
recordId,
result: 'Dropped',
data: Buffer.from('').toString('base64'),
};
};
const processData = (
payloadStr: string,
record: FirehoseTransformationEventRecord
) => {
let jsonRecord;
// ...
// Process the orginal payload,
// And create the record in JSON
return jsonRecord;
};
const transformRecord = (
record: FirehoseTransformationEventRecord
): FirehoseTransformationResultRecord => {
try {
const payloadStr = Buffer.from(record.data, 'base64').toString();
const jsonRecord = processData(payloadStr, record);
if (!jsonRecord) {
console.error('Error creating json record');
return createDroppedRecord(record.recordId);
}
return {
recordId: record.recordId,
result: 'Ok',
// Ensure that '\n' is appended to the record's JSON string.
data: Buffer.from(JSON.stringify(jsonRecord) + '\n').toString('base64'),
};
} catch (error) {
console.error('Error processing record ${record.recordId}: ', error);
return createDroppedRecord(record.recordId);
}
};
const transformRecords = (
event: FirehoseTransformationEvent
): FirehoseTransformationResult => {
let records: FirehoseTransformationResultRecord[] = [];
for (const record of event.records) {
const transformed = transformRecord(record);
records.push(transformed);
}
return { records };
};
export const handler: FirehoseTransformationHandler = async (
event,
_context
) => {
const transformed = transformRecords(event);
return transformed;
};
Once the newline delimiter is in place, AWS services such as Athena will be able to work properly with the JSON record data in the S3 bucket, not just seeing the first JSON record only.
Once AWS Firehose dumps the JSON objects to s3, it's perfectly possible to read the individual JSON objects from the files.
Using Python you can use the raw_decode function from the json package
from json import JSONDecoder, JSONDecodeError
import re
import json
import boto3
NOT_WHITESPACE = re.compile(r'[^\s]')
def decode_stacked(document, pos=0, decoder=JSONDecoder()):
while True:
match = NOT_WHITESPACE.search(document, pos)
if not match:
return
pos = match.start()
try:
obj, pos = decoder.raw_decode(document, pos)
except JSONDecodeError:
# do something sensible if there's some error
raise
yield obj
s3 = boto3.resource('s3')
obj = s3.Object("my-bukcet", "my-firehose-json-key.json")
file_content = obj.get()['Body'].read()
for obj in decode_stacked(file_content):
print(json.dumps(obj))
# { "key1":value1,"key2":value2}
# { "key1":value1,"key2":value2}
source: https://stackoverflow.com/a/50384432/1771155
Using Glue / Pyspark you can use
import json
rdd = sc.textFile("s3a://my-bucket/my-firehose-file-containing-json-objects")
df = rdd.map(lambda x: json.loads(x)).toDF()
df.show()
source: https://stackoverflow.com/a/62984450/1771155
please use this code to solve your issue
__Author__ = "Soumil Nitin Shah"
import json
import boto3
import base64
class MyHasher(object):
def __init__(self, key):
self.key = key
def get(self):
keys = str(self.key).encode("UTF-8")
keys = base64.b64encode(keys)
keys = keys.decode("UTF-8")
return keys
def lambda_handler(event, context):
output = []
for record in event['records']:
payload = base64.b64decode(record['data'])
"""Get the payload from event bridge and just get data attr """""
serialize_payload = str(json.loads(payload)) + "\n"
hasherHelper = MyHasher(key=serialize_payload)
hash = hasherHelper.get()
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': hash
}
print("output_record", output_record)
output.append(output_record)
return {'records': output}

Grails - JSON binding causing JSONException

I have the following controller code:
def save(MyModel model) {
model.save()
}
And I'm testing it using:
//e.g. 2ff59e55-ee3d-4f66-8bfa-00f355f52c49
def uuid = UUID.randomUUID.toString()
controller.request.contentType = JSON_CONTENT_TYPE
controller.request.method = 'POST'
controller.request.json = "{'uuid': '$uuid', 'description': 'test object', 'count': 1}"
controller.save()
However, every time I run the test I get,
org.apache.commons.lang.UnhandledException:
org.codehaus.groovy.grails.web.converters.exceptions.ConverterException:
org.codehaus.groovy.grails.web.json.JSONException: Value out of sequence: expected mode to be
OBJECT or ARRAY when writing '{'uuid': '2ff59e55-ee3d-4f66-8bfa-00f355f52c49', 'description': 'test object', 'count': 1}' but was INIT
The JSON converter chokes on Groovy Strings. I've solved this by slapping a .toString() on the end: "{'uuid':'$uuid'}".toString().
Try this
void "Test the save action correctly persists an instance"() {
when:
Integer originalCount = MyModel.count()
String uuid = UUID.randomUUID().toString()
controller.request.contentType = 'application/json'
controller.request.method = 'POST'
controller.request.json = ['uuid': uuid, 'description': 'test object'] as JSON
controller.save()
then:
assert originalCount + 1 == MyModel.count()
}