Why does Pycryptodome MAC check fail when encrypting and decrypting JSON files? - json

I am trying to do encrypt some JSON data with AES-256, using a password hashed with pbkdf2_sha256 as the key. I want to store the data in a file, be able to load it up, decrypt it, alter it, encrypt it, store it, and repeat.
I am using the passlib and pycryptodome libraries with python 3.8. The following test occurs inside a docker container and throws an error I haven't been able to correct
Does anyone have any clues on how I can improve my code (and knowledge)?
Test.py:
import os, json
from Crypto.PublicKey import RSA
from Crypto.Cipher import AES
from passlib.hash import pbkdf2_sha256
def setJsonData(jsonData, jsonFileName):
with open(jsonFileName, 'wb') as jsonFile:
password = 'd'
key = pbkdf2_sha256.hash(password)[-16:]
data = json.dumps(jsonData).encode("utf8")
cipher = AES.new(key.encode("utf8"), AES.MODE_EAX)
ciphertext, tag = cipher.encrypt_and_digest(data)
[ jsonFile.write(x) for x in (cipher.nonce, tag, ciphertext) ]
def getJsonData(jsonFileName):
with open(jsonFileName, 'rb') as jsonFile:
password = 'd'
key = pbkdf2_sha256.hash(password)[-16:]
nonce, tag, ciphertext = [ jsonFile.read(x) for x in (16, 16, -1) ]
cipher = AES.new(key.encode("utf8"), AES.MODE_EAX, nonce)
data = cipher.decrypt_and_verify(ciphertext, tag)
return json.loads(data)
dictTest = {}
dictTest['test'] = 1
print(str(dictTest))
setJsonData(dictTest, "test")
dictTest = getJsonData("test")
print(str(dictTest))
Output:
{'test': 1}
Traceback (most recent call last):
File "test.py", line 37, in <module>
dictTest = getJsonData("test")
File "test.py", line 24, in getJsonData
data = cipher.decrypt_and_verify(ciphertext, tag)
File "/usr/local/lib/python3.8/site-packages/Crypto/Cipher/_mode_eax.py", line 368, in decrypt_and_verify
self.verify(received_mac_tag)
File "/usr/local/lib/python3.8/site-packages/Crypto/Cipher/_mode_eax.py", line 309, in verify
raise ValueError("MAC check failed")
ValueError: MAC check failed
Research:
Looked into this answer, but I believe my verify() call is in
the right place
I noted that in the python docs, it says:
loads(dumps(x)) != x if x has non-string keys.
but, when I re-run the test with dictTest['test'] = 'a' I have the same error.
I suspected the problem was the json formatting, so I did the same test with a string and didn't make the json.loads and json.dumps calls, but I have the same error

The problem here is that key = pbkdf2_sha256.hash(password)[-16:] hashes the key with a new salt each call. Therefore, the cipher used to encrypt and decrypt the cipher text is going to be different, yielding different data, and thus failing the integrity check.
I changed my key derivation function to the following:
h = SHA3_256.new()
h.update(password.encode("utf-8"))
key = h.digest()

Related

Convert a Bytes String to Dictionary in Python

Basic Information
I am creating a python script that can encrypt and decrypt a file with previous session data.
The Problem
I am able to decrypt my file and read it using a key. This returns a bytes string which I can in turn convert to a string. However, this string needs to be converted to a dictionary, which I cannot do. Using ast, json and eval I have run into errors.
Bytes string
decrypted = fernet.decrypt(encrypted)
String
string = decrypted.decode("UTF-8").replace("'", '"')
If I use eval() or ast.literal_eval() I get the following error:
Then I tried using json.loads() and I get the following error:
The information blocked out on both images is to protect my SSH connections. In the first image it is giving me a SyntaxError at the last digit of my ip address.
The Function
The function that is responsible for this when called looks like this:
def FileDecryption():
with open('enc_key.key', 'rb') as filekey:
key = filekey.read()
filekey.close()
fernet = Fernet(key)
with open('saved_data.txt', 'rb') as enc_file:
encrypted = enc_file.read()
enc_file.close()
decrypted = fernet.decrypt(encrypted)
print(decrypted)
string = decrypted.decode("UTF-8").replace("'", '"')
data = f'{string}'
print(data)
#data = eval(data)
data = json.loads(data)
print(type(data))
for key in data:
#command_string = ["load", data[key][1], data[key][2], data[key][3], data[key][4]]
#SSH.CreateSSH(command_string)
print(key)
Any help would be appreciated. Thanks!
Your data seems like it was written incorrectly in the first place, but without a complete example hard to say.
Here's a complete example that round-trips a JSON-able data object.
# requirement:
# pip install cryptography
from cryptography.fernet import Fernet
import json
def encrypt(data, data_filename, key_filename):
key = Fernet.generate_key()
with open(key_filename, 'wb') as file:
file.write(key)
fernet = Fernet(key)
encrypted = fernet.encrypt(json.dumps(data).encode())
with open(data_filename, 'wb') as file:
file.write(encrypted)
def decrypt(data_filename, key_filename):
with open(key_filename, 'rb') as file:
key = file.read()
fernet = Fernet(key)
with open(data_filename, 'rb') as file:
return json.loads(fernet.decrypt(file.read()))
data = {'key1': 'value1', 'key2': 'value2'}
encrypt(data, 'saved_data.txt', 'enc_key.key')
decrypted = decrypt('saved_data.txt', 'enc_key.key')
print(decrypted)
Output:
{'key1': 'value1', 'key2': 'value2'}

Iterating through describe_instances() to print key & value boto3

I am currently working on a python script to print pieces of information on running EC2 instances on AWS using Boto3. I am trying to print the InstanceID, InstanceType, and PublicIp. I looked through Boto3's documentation and example scripts so this is what I am using:
import boto3
ec2client = boto3.client('ec2')
response = ec2client.describe_instances()
for reservation in response["Reservations"]:
for instance in reservation["Instances"]:
instance_id = instance["InstanceId"]
instance_type = instance["InstanceType"]
instance_ip = instance["NetworkInterfaces"][0]["Association"]
print(instance)
print(instance_id)
print(instance_type)
print(instance_ip)
When I run this, "instance" prints one large block of json code, my instanceID, and type. But I am getting an error since adding NetworkInterfaces.
instance_ip = instance["NetworkInterfaces"][0]["Association"]
returns:
Traceback (most recent call last):
File "/Users/me/AWS/describeInstances.py", line 12, in <module>
instance_ip = instance["NetworkInterfaces"][0]["Association"]
KeyError: 'Association'
What am I doing wrong while trying to print the PublicIp?
Here is the structure of NetworkInterfaces for reference:
The full Response Syntax for reference can be found here (https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_instances)
Association man not always may be present. Also an instance may have more then one interface. So your working loop could be:
for reservation in response["Reservations"]:
for instance in reservation["Instances"]:
instance_id = instance["InstanceId"]
instance_type = instance["InstanceType"]
#print(instance)
print(instance_id, instance_type)
for network_interface in instance["NetworkInterfaces"]:
instance_ip = network_interface.get("Association", "no-association")
print(' -', instance_ip)

reading JSON from file and extract the keys returns attribute str has no keys

I am new to Python (and JSON) so apologies of this is obvious to you.
I pull some data from an API using the following code
import requests
import json
headers = {'Content-Type': 'application/json', 'accept-encoding':'identity'}
api_url = api_url_base+api_token+api_request #variables removed for security
response = requests.get(api_url, headers=headers)
data=response.json()
keys=data.keys
if response.status_code == 200:
print(data["message"], "saving to file...")
print("Found the following keys:")
print(keys)
with open('vulns.json', 'w') as outfile:
json.dump(response.content.decode('utf-8'),outfile)
print("File Saved.")
else:
print('The site returned a', response.status_code, 'error')
this works, I get some data returned and I am able to write the file.
I am trying to change what's returned form a short format to a long format and to check its working I need to see the keys, I was trying to do this offline using the written file (as practice for reading JSON from files).
I wrote these few lines (taken from this site https://www.kite.com/python/answers/how-to-print-the-keys-of-a-dictionary-in-python)
import json
with open('vulns.json') as json_file:
data=json.load(json_file)
print(data)
keys=list(data.keys())
print(keys)
Unfortunately, whenever I run this it returns this error
Python 3.9.1 (tags/v3.9.1:1e5d33e, Dec 7 2020, 17:08:21) [MSC v.1927 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> print(keys)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'keys' is not defined
>>> & C:/Users/xxxx/AppData/Local/Microsoft/WindowsApps/python.exe c:/Temp/read-vulnfile.py
File "<stdin>", line 1
& C:/Users/xxxx/AppData/Local/Microsoft/WindowsApps/python.exe c:/Temp/read-vulnfile.py
^
SyntaxError: invalid syntax
>>> exit()
PS C:\Users\xxxx\Documents\scripts\Python> & C:/Users/xxx/AppData/Local/Microsoft/WindowsApps/python.exe c:/Temp/read-vulnfile.py
Traceback (most recent call last):
File "c:\Temp\read-vulnfile.py", line 6, in <module>
keys=list(data.keys)
AttributeError: 'str' object has no attribute 'keys'
The Print(data) command returns what looks like JSON, this is the opening line:
{"count": 1000, "message": "Vulnerabilities found: 1000", "data":
[{"...
I cant show the content it's sensitive.
why is this looking at a str object rather than a dictionary?
how do I read JSON back into a dictionary please?
You just have that content stored in file as a string. Just open the vulns.json in some editor and there most likely is something like "{'count': 1000, ... instead of {"count": 1000, ....
It's opened by json.load, but translated to string (see this table).
So you should take one step back and take a look what happens during saving to file. You take some content from your response, but dump the string decoded value into a file. Take instead a try with
json.dump(response.json(), outfile)
(or just use data variable you already have provided).
This should allow you to succesfully dump and load data as a dict.

Convert a pipeline_pb2.TrainEvalPipelineConfig to JSON or YAML file for tensorflow object detection API

I want to convert a pipeline_pb2.TrainEvalPipelineConfig to JSON or YAML file format for tensorflow object detection API. I tried converting the protobuf file using :
import tensorflow as tf
from google.protobuf import text_format
import yaml
from object_detection.protos import pipeline_pb2
def get_configs_from_pipeline_file(pipeline_config_path, config_override=None):
'''
read .config and convert it to proto_buffer_object
'''
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(pipeline_config_path, "r") as f:
proto_str = f.read()
text_format.Merge(proto_str, pipeline_config)
if config_override:
text_format.Merge(config_override, pipeline_config)
#print(pipeline_config)
return pipeline_config
def create_configs_from_pipeline_proto(pipeline_config):
'''
Returns the configurations as dictionary
'''
configs = {}
configs["model"] = pipeline_config.model
configs["train_config"] = pipeline_config.train_config
configs["train_input_config"] = pipeline_config.train_input_reader
configs["eval_config"] = pipeline_config.eval_config
configs["eval_input_configs"] = pipeline_config.eval_input_reader
# Keeps eval_input_config only for backwards compatibility. All clients should
# read eval_input_configs instead.
if configs["eval_input_configs"]:
configs["eval_input_config"] = configs["eval_input_configs"][0]
if pipeline_config.HasField("graph_rewriter"):
configs["graph_rewriter_config"] = pipeline_config.graph_rewriter
return configs
configs = get_configs_from_pipeline_file('pipeline.config')
config_as_dict = create_configs_from_pipeline_proto(configs)
But when I try converting this returned dictionary to YAML with yaml.dump(config_as_dict) it says
TypeError: can't pickle google.protobuf.pyext._message.RepeatedCompositeContainer objects
For json.dump(config_as_dict) it says :
Traceback (most recent call last):
File "config_file_parsing.py", line 48, in <module>
config_as_json = json.dumps(config_as_dict)
File "/usr/lib/python3.5/json/__init__.py", line 230, in dumps
return _default_encoder.encode(obj)
File "/usr/lib/python3.5/json/encoder.py", line 198, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/usr/lib/python3.5/json/encoder.py", line 256, in iterencode
return _iterencode(o, 0)
File "/usr/lib/python3.5/json/encoder.py", line 179, in default
raise TypeError(repr(o) + " is not JSON serializable")
TypeError: label_map_path: "label_map.pbtxt"
shuffle: true
tf_record_input_reader {
input_path: "dataset.record"
}
is not JSON serializable
Would appreciate some help here.
JSON can only dump a subset of the python primtivies primitives and dict and list collections (with limitation on self-referencing).
YAML is more powerful, and can be used to dump arbitrary Python objects. But only if those objects can be "investigated" during the representation phase of the dump, which essentially limits that to instances of pure Python classes. For objects created at the C level, one can make explicit dumpers, and if not available Python will try and use the pickle protocol to dump the data to YAML.
Inspecing protobuf on PyPI shows me that there are non-generic wheels available, which is always an indication for some C code optimization. Inspecting one of these files indeed shows a pre-compiled shared object.
Although you make a dict out of the config, this dict can of course only be dumped when all its keys and all its values can be dumped. Since your keys are strings (necessary for JSON), you need to look at each of the values, to find the one that doesn't dump, and convert that to a dumpable object structure (dict/list for JSON, pure Python class for YAML).
You might want to take a look at Module json_format

How to use mock_open with json.load()?

I'm trying to get a unit test working that validates a function that reads credentials from a JSON-encoded file. Since the credentials themselves aren't fixed, the unit test needs to provide some and then test that they are correctly retrieved.
Here is the credentials function:
def read_credentials():
basedir = os.path.dirname(__file__)
with open(os.path.join(basedir, "authentication.json")) as f:
data = json.load(f)
return data["bot_name"], data["bot_password"]
and here is the test:
def test_credentials(self):
with patch("builtins.open", mock_open(
read_data='{"bot_name": "name", "bot_password": "password"}\n'
)):
name, password = shared.read_credentials()
self.assertEqual(name, "name")
self.assertEqual(password, "password")
However, when I run the test, the json code blows up with a decode error. Looking at the json code itself, I'm struggling to see why the mock test is failing because json.load(f) simply calls f.read() then calls json.loads().
Indeed, if I change my authentication function to the following, the unit test works:
def read_credentials():
# Read the authentication file from the current directory and create a
# HTTPBasicAuth object that can then be used for future calls.
basedir = os.path.dirname(__file__)
with open(os.path.join(basedir, "authentication.json")) as f:
content = f.read()
data = json.loads(content)
return data["bot_name"], data["bot_password"]
I don't necessarily mind leaving my code in this form, but I'd like to understand if I've got something wrong in my test that would allow me to keep my function in its original form.
Stack trace:
Traceback (most recent call last):
File "test_shared.py", line 56, in test_credentials
shared.read_credentials()
File "shared.py", line 60, in read_credentials
data = json.loads(content)
File "/home/philip/.local/share/virtualenvs/atlassian-webhook-basic-3gOncDp4/lib/python3.6/site-packages/flask/json/__init__.py", line 205, in loads
return _json.loads(s, **kwargs)
File "/usr/lib/python3.6/json/__init__.py", line 367, in loads
return cls(**kw).decode(s)
File "/usr/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python3.6/json/decoder.py", line 357, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
I had the same issue and got around it by mocking json.load and builtins.open:
import json
from unittest.mock import patch, MagicMock
# I don't care about the actual open
p1 = patch( "builtins.open", MagicMock() )
m = MagicMock( side_effect = [ { "foo": "bar" } ] )
p2 = patch( "json.load", m )
with p1 as p_open:
with p2 as p_json_load:
f = open( "filename" )
print( json.load( f ) )
Result:
{'foo': 'bar'}
I had the exact same issue and solved it. Full code below, first the function to test, then the test itself.
The original function I want to test loads a json file that is structured like a dictionary, and checks to see if there's a specific key-value pair in it:
def check_if_file_has_real_data(filepath):
with open(filepath, "r") as f:
data = json.load(f)
if "fake" in data["the_data"]:
return False
else:
return True
But I want to test this without loading any actual file, exactly as you describe. Here's how I solved it:
from my_module import check_if_file_has_real_data
import mock
#mock.patch("my_module.json.load")
#mock.patch("my_module.open")
def test_check_if_file_has_real_data(mock_open, mock_json_load):
mock_json_load.return_value = dict({"the_data": "This is fake data"})
assert check_if_file_has_real_data("filepath") == False
mock_json_load.return_value = dict({"the_data": "This is real data"})
assert check_if_file_has_real_data("filepath") == True
The mock_open object isn't called explicitly in the test function, but if you don't include that decorator and argument you get a filepath error when the with open part of the check_if_file_has_real_data function tries to run using the actual open function rather than the MagicMock object that's been passed into it.
Then you overwrite the response provided by the json.load mock with whatever you want to test.