Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[WIP] Improving body serialization
  • Loading branch information
Leguark committed May 23, 2025
commit 122669cd237f650b45dde7a350086dafcbe0260d
52 changes: 43 additions & 9 deletions gempy/modules/serialization/save_load.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,39 @@
import json

import numpy as np

from ...core.data import GeoModel
from ...core.data.encoders.converters import loading_model_injection
from ...optional_dependencies import require_zlib


def save_model(model: GeoModel, path: str):
import zlib

# TODO: Serialize to json
model_json = model.model_dump_json(by_alias=True, indent=4)

# TODO: Serialize to binary
sp_binary = model.structural_frame.surface_points_copy.data.tobytes()
data: np.ndarray = model.structural_frame.surface_points_copy.data
sp_binary = data.tobytes()
ori_binary = model.structural_frame.orientations_copy.data.tobytes()

# Compress the binary data
compressed_binary = zlib.compress(sp_binary + ori_binary)

# Add compression info to metadata
model_dict = model.model_dump(by_alias=True)
model_dict["_binary_metadata"] = {
"sp_shape" : model.structural_frame.surface_points_copy.data.shape,
"sp_dtype" : str(model.structural_frame.surface_points_copy.data.dtype),
"ori_shape" : model.structural_frame.orientations_copy.data.shape,
"ori_dtype" : str(model.structural_frame.orientations_copy.data.dtype),
"compression": "zlib",
"sp_length" : len(sp_binary) # Need this to split the arrays after decompression
}

# TODO: Putting both together
binary_file = _to_binary(model_json, sp_binary + ori_binary)
binary_file = _to_binary(model_json, compressed_binary)
with open(path, 'wb') as f:
f.write(binary_file)

Expand All @@ -25,22 +46,35 @@ def load_model(path: str) -> GeoModel:

# Split header and body
header_json = binary_file[4:4 + header_length].decode('utf-8')
body = binary_file[4 + header_length:]
header_dict = json.loads(header_json)

# Split body into surface points and orientations
# They are equal size so we can split in half
sp_binary = body[:len(body) // 2]
ori_binary = body[len(body) // 2:]
metadata = header_dict.pop("_binary_metadata")

# Decompress the binary data
ori_data, sp_data = _foo(binary_file, header_length, metadata)

with loading_model_injection(
surface_points_binary=sp_binary,
orientations_binary=ori_binary
surface_points_binary=sp_data,
orientations_binary=ori_data
):
model = GeoModel.model_validate_json(header_json)

return model


def _foo(binary_file, header_length, metadata):
zlib = require_zlib()
body = binary_file[4 + header_length:]
decompressed_binary = zlib.decompress(body)
# Split the decompressed data using the stored length
sp_binary = decompressed_binary[:metadata["sp_length"]]
ori_binary = decompressed_binary[metadata["sp_length"]:]
# Reconstruct arrays
sp_data = np.frombuffer(sp_binary, dtype=np.dtype(metadata["sp_dtype"])).reshape(metadata["sp_shape"])
ori_data = np.frombuffer(ori_binary, dtype=np.dtype(metadata["ori_dtype"])).reshape(metadata["ori_shape"])
return ori_data, sp_data


def _to_binary(header_json, body_) -> bytes:
header_json_bytes = header_json.encode('utf-8')
header_json_length = len(header_json_bytes)
Expand Down
9 changes: 8 additions & 1 deletion gempy/optional_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,11 @@ def require_subsurface():
import subsurface
except ImportError:
raise ImportError("The subsurface package is required to run this function.")
return subsurface
return subsurface

def require_zlib():
try:
import zlib
except ImportError:
raise ImportError("The zlib package is required to run this function.")
return zlib
3 changes: 2 additions & 1 deletion requirements/optional-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ gempy_plugins
# for data download
pooch
scipy
scikit-image
scikit-image
zlib
24 changes: 13 additions & 11 deletions test/test_modules/test_serialize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,7 @@ def test_generate_horizontal_stratigraphic_model():
):
model_deserialized = gp.data.GeoModel.model_validate_json(model_json)

a = hash(model.structural_frame.structural_elements[1].surface_points.data.tobytes())
b = hash(model_deserialized.structural_frame.structural_elements[1].surface_points.data.tobytes())

o_a = hash(model.structural_frame.structural_elements[1].orientations.data.tobytes())
o_b = hash(model_deserialized.structural_frame.structural_elements[1].orientations.data.tobytes())

assert a == b, "Hashes for surface points are not equal"
assert o_a == o_b, "Hashes for orientations are not equal"
assert model_deserialized.__str__() == model.__str__()
_validate_serialization(model, model_deserialized)

# # Validate json against schema
if True:
Expand All @@ -43,15 +35,25 @@ def test_generate_horizontal_stratigraphic_model():
verify_model = json.loads(model_json)
verify_model["meta"]["creation_date"] = "<DATE_IGNORED>"
verify_json(json.dumps(verify_model, indent=4), name="verify/Horizontal Stratigraphic Model serialization")



def _validate_serialization(original_model, model_deserialized):
a = hash(original_model.structural_frame.structural_elements[1].surface_points.data.tobytes())
b = hash(model_deserialized.structural_frame.structural_elements[1].surface_points.data.tobytes())
o_a = hash(original_model.structural_frame.structural_elements[1].orientations.data.tobytes())
o_b = hash(model_deserialized.structural_frame.structural_elements[1].orientations.data.tobytes())
assert a == b, "Hashes for surface points are not equal"
assert o_a == o_b, "Hashes for orientations are not equal"
assert model_deserialized.__str__() == original_model.__str__()


def test_save_model_to_disk():
model = gp.generate_example_model(ExampleModel.COMBINATION, compute_model=False)
save_model(model, "temp/test_save_model_to_disk.json")

# Load the model from disk
loaded_model = load_model("temp/test_save_model_to_disk.json")
assert loaded_model.__str__() == model.__str__()
_validate_serialization(model, loaded_model)



Expand Down