[WIP] Deserialize binary seems to run

gempy-project · Leguark · May 25, 2025 · May 20, 2025 · May 20, 2025 · May 20, 2025
commit 0211f9632b5cdcf146a19c06a6a3e717ff41e3bb
diff --git a/gempy/core/data/encoders/converters.py b/gempy/core/data/encoders/converters.py
@@ -57,3 +57,14 @@ def loading_model_injection(surface_points_binary: np.ndarray, orientations_bina
     finally:
         loading_model_context.reset(token)
 
+
+@contextmanager
+def loading_model_from_binary(binary_body: bytes):
+    token = loading_model_context.set({
+            'binary_body': binary_body,
+    })
+    try:
+        yield
+    finally:
+        loading_model_context.reset(token)
+
diff --git a/gempy/core/data/geo_model.py b/gempy/core/data/geo_model.py
@@ -302,23 +302,20 @@ def add_surface_points(self, X: Sequence[float], Y: Sequence[float], Z: Sequence
     @model_validator(mode='wrap')
     @classmethod
     def deserialize_properties(cls, data: Union["GeoModel", dict], constructor: ModelWrapValidatorHandler["GeoModel"]) -> "GeoModel":
-        try:
-            match data:
-                case GeoModel():
-                    return data
-                case dict():
-                    instance: GeoModel = constructor(data)
-                    instantiate_if_necessary(
-                        data=data,
-                        key="_interpolation_options",
-                        type=InterpolationOptions
-                    )
-                    instance._interpolation_options = data.get("_interpolation_options")
-                    return instance
-                case _:
-                    raise ValidationError
-        except ValidationError:
-            raise
+        match data:
+            case GeoModel():
+                return data
+            case dict():
+                instance: GeoModel = constructor(data)
+                instantiate_if_necessary(
+                    data=data,
+                    key="_interpolation_options",
+                    type=InterpolationOptions
+                )
+                instance._interpolation_options = data.get("_interpolation_options")
+                return instance
+            case _:
+                raise ValidationError
 
     # endregion
 

diff --git a/gempy/core/data/structural_frame.py b/gempy/core/data/structural_frame.py
@@ -3,8 +3,9 @@
 import numpy as np
 import warnings
 from dataclasses import dataclass
-from pydantic import model_validator, computed_field
-from typing import Generator
+from pydantic import model_validator, computed_field, ValidationError
+from pydantic.functional_validators import ModelWrapValidatorHandler
+from typing import Generator, Union
 
 from gempy_engine.core.data.input_data_descriptor import InputDataDescriptor
 from gempy_engine.core.data.kernel_classes.faults import FaultsData
@@ -33,47 +34,101 @@ class StructuralFrame:
     # ? Should I create some sort of structural options class? For example, the masking descriptor and faults relations pointer
     is_dirty: bool = True
 
+    @model_validator(mode="wrap")
+    @classmethod
+    def deserialize_binary(cls, data: Union["StructuralFrame", dict], constructor: ModelWrapValidatorHandler["StructuralFrame"]) -> "StructuralFrame":
+        match data:
+            case StructuralFrame():
+                return data
+            case dict():
+                instance: StructuralFrame = constructor(data)
+                metadata = data.get('binary_meta_data', {})
+
+                context = loading_model_context.get()
+
+                if 'binary_body' not in context:
+                    return instance
+
+                binary_array = context['binary_body']
+
+                sp_binary = binary_array[:metadata["sp_binary_length"]]
+                ori_binary = binary_array[metadata["sp_binary_length"]:]
+
+                # Reconstruct arrays
+                sp_data: np.ndarray = np.frombuffer(sp_binary, dtype=SurfacePointsTable.dt)
+                ori_data: np.ndarray = np.frombuffer(ori_binary, dtype=OrientationsTable.dt)
+
+                instance.surface_points = SurfacePointsTable(
+                    data=sp_data,
+                    name_id_map=instance.surface_points_copy.name_id_map
+                )
+
+                instance.orientations = OrientationsTable(
+                    data=ori_data,
+                    name_id_map=instance.orientations_copy.name_id_map
+                )
+
+                return instance
+            case _:
+                raise ValidationError(f"Invalid data type for StructuralFrame: {type(data)}")
+
+        # Access the context variable to get injected data
+
     @model_validator(mode="after")
-    def deserialize_surface_points(values: "StructuralFrame"):
+    def deserialize_surface_points(self: "StructuralFrame"):
         # Access the context variable to get injected data
         context = loading_model_context.get()
 
         if 'surface_points_binary' not in context:
-            return values
+            return self
 
         # Check if we have a binary payload to digest
         binary_array = context['surface_points_binary']
         if not isinstance(binary_array, np.ndarray):
-            return values
+            return self
         if binary_array.shape[0] < 1:
-            return values
-        
-        values.surface_points = SurfacePointsTable(
+            return self
+
+        self.surface_points = SurfacePointsTable(
             data=binary_array,
-            name_id_map=values.surface_points_copy.name_id_map
+            name_id_map=self.surface_points_copy.name_id_map
         )
-        
-        return values
-    
+
+        return self
+
     @model_validator(mode="after")
-    def deserialize_orientations(values: "StructuralFrame"):
+    def deserialize_orientations(self: "StructuralFrame"):
+        # TODO: Check here the binary size of surface_points_binary
+
         # Access the context variable to get injected data
         context = loading_model_context.get()
         if 'orientations_binary' not in context:
-            return values
-        
+            return self
+
         # Check if we have a binary payload to digest
         binary_array = context['orientations_binary']
         if not isinstance(binary_array, np.ndarray):
-            return values
-        
-        values.orientations = OrientationsTable(
+            return self
+
+        self.orientations = OrientationsTable(
             data=binary_array,
-            name_id_map=values.orientations_copy.name_id_map
+            name_id_map=self.orientations_copy.name_id_map
         )
-
-        return values
-
+
+        return self
+
+    @computed_field
+    def binary_meta_data(self) -> dict:
+        sp_data = self.surface_points_copy.data
+        ori_data = self.orientations_copy.data
+        return {
+                'sp_shape'         : sp_data.shape,
+                'sp_dtype'         : str(sp_data.dtype),
+                'sp_binary_length' : len(sp_data.tobytes()),
+                'ori_shape'        : ori_data.shape,
+                'ori_dtype'        : str(ori_data.dtype),
+                'ori_binary_length': len(ori_data.tobytes())
+        }
 
     @computed_field
     @property

diff --git a/gempy/core/data/surface_points.py b/gempy/core/data/surface_points.py
@@ -1,12 +1,12 @@
-from dataclasses import dataclass
+import numpy as np
+from dataclasses import dataclass
 from pydantic import field_validator, SkipValidation
-from typing import Optional, Union, Sequence, Annotated
-import numpy as np
+from typing import Optional, Union, Sequence
 
-from ._data_points_helpers import generate_ids_from_names
-from .encoders.converters import numpy_array_short_validator
 from gempy_engine.core.data.transforms import Transform
-from gempy.optional_dependencies import require_pandas
+
+from ...optional_dependencies import require_pandas
+from ._data_points_helpers import generate_ids_from_names
 
 DEFAULT_SP_NUGGET = 0.00002
 

diff --git a/gempy/modules/serialization/save_load.py b/gempy/modules/serialization/save_load.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from ...core.data import GeoModel
-from ...core.data.encoders.converters import loading_model_injection
+from ...core.data.encoders.converters import loading_model_injection, loading_model_from_binary
 from ...optional_dependencies import require_zlib
 
 
@@ -22,15 +22,15 @@ def save_model(model: GeoModel, path: str):
     compressed_binary = zlib.compress(sp_binary + ori_binary)
 
     # Add compression info to metadata
-    model_dict = model.model_dump(by_alias=True)
-    model_dict["_binary_metadata"] = {
-            "sp_shape"   : model.structural_frame.surface_points_copy.data.shape,
-            "sp_dtype"   : str(model.structural_frame.surface_points_copy.data.dtype),
-            "ori_shape"  : model.structural_frame.orientations_copy.data.shape,
-            "ori_dtype"  : str(model.structural_frame.orientations_copy.data.dtype),
-            "compression": "zlib",
-            "sp_length"  : len(sp_binary)  # Need this to split the arrays after decompression
-    }
+    # model_dict = model.model_dump(by_alias=True)
+    # model_dict["_binary_metadata"] = {
+    #         "sp_shape"   : model.structural_frame.surface_points_copy.data.shape,
+    #         "sp_dtype"   : str(model.structural_frame.surface_points_copy.data.dtype),
+    #         "ori_shape"  : model.structural_frame.orientations_copy.data.shape,
+    #         "ori_dtype"  : str(model.structural_frame.orientations_copy.data.dtype),
+    #         "compression": "zlib",
+    #         "sp_length"  : len(sp_binary)  # Need this to split the arrays after decompression
+    # }
 
     # TODO: Putting both together
     binary_file = _to_binary(model_json, compressed_binary)
@@ -48,14 +48,16 @@ def load_model(path: str) -> GeoModel:
     header_json = binary_file[4:4 + header_length].decode('utf-8')
     header_dict = json.loads(header_json)
 
-    metadata = header_dict.pop("_binary_metadata")
+    # metadata = header_dict.pop("_binary_metadata")
 
     # Decompress the binary data
-    ori_data, sp_data = _foo(binary_file, header_length, metadata)
+    # ori_data, sp_data = _foo(binary_file, header_length, metadata)
 
-    with loading_model_injection(
-            surface_points_binary=sp_data,
-            orientations_binary=ori_data
+    binary_body = binary_file[4 + header_length:]
+    zlib = require_zlib()
+    decompressed_binary = zlib.decompress(binary_body)
+    with loading_model_from_binary(
+        binary_body=decompressed_binary,
     ):
         model = GeoModel.model_validate_json(header_json)