Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9dd9fbc
schema_v1-dataset_builder-add_dimension
dmitriyrepin Jun 24, 2025
f88531e
Merge remote-tracking branch 'upstream/v1' into v1
dmitriyrepin Jun 26, 2025
1358f95
First take on add_dimension(), add_coordinate(), add_variable()
dmitriyrepin Jun 27, 2025
e5261cb
Finished add_dimension, add_coordinate, add_variable
dmitriyrepin Jun 28, 2025
95c01d8
Work on build
dmitriyrepin Jun 30, 2025
46f82f0
Generalize _to_dictionary()
dmitriyrepin Jul 1, 2025
0dc7cc8
build
dmitriyrepin Jul 1, 2025
79863ac
Dataset Build - pass one
dmitriyrepin Jul 2, 2025
ec480f1
Merge the latest TGSAI/mdio-python:v1 branch
dmitriyrepin Jul 2, 2025
fa81ea2
Merge branch 'v1' into v1
tasansal Jul 7, 2025
4b2b163
Revert .container changes
dmitriyrepin Jul 7, 2025
c532c3b
PR review: remove DEVELOPER_NOTES.md
dmitriyrepin Jul 7, 2025
08798cd
PR Review: add_coordinate() should accept only data_type: ScalarType
dmitriyrepin Jul 7, 2025
e8febe4
PR review: add_variable() data_type remove default
dmitriyrepin Jul 7, 2025
0a4be3f
RE review: do not add dimension variable
dmitriyrepin Jul 8, 2025
7b25d6b
PR Review: get api version from the package version
dmitriyrepin Jul 8, 2025
7ca3ed8
PR Review: remove add_dimension_coordinate
dmitriyrepin Jul 9, 2025
4d1ec9c
PR Review: add_coordinate() remove data_type default value
dmitriyrepin Jul 9, 2025
99fcf43
PR Review: improve unit tests by extracting common functionality in v…
dmitriyrepin Jul 9, 2025
0778fdd
Remove the Dockerfile changes. They are not supposed to be a part of …
dmitriyrepin Jul 9, 2025
7e74567
PR Review: run ruff
dmitriyrepin Jul 9, 2025
0aaa5f6
PR Review: fix pre-commit errors
dmitriyrepin Jul 10, 2025
1904dee
remove some noqa overrides
tasansal Jul 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
PR Review: remove add_dimension_coordinate
  • Loading branch information
dmitriyrepin committed Jul 9, 2025
commit 7ca3ed8fbc1b6f47cbf10e01737d4c8a51f6f7ba
47 changes: 47 additions & 0 deletions .devcontainer/Dockerfile.cli
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# HOW TO BUILD AND RUN THIS DOCKERFILE
# * Clone mdio-python and build a Docker image:
# git clone https://github.com/TGSAI/mdio-python.git
# cd mdio-python
# docker build -t mdio-cli -f .devcontainer/Dockerfile.cli .
# * Run /bin/bash in the Docker container:
#
#
# USAGE:
# docker run -it --rm --name mdio-cli mdio-cli --version
# docker run -it --rm --name mdio-cli mdio-cli --help
#
# LOCAL_DATA_DIR=$(pwd); \
# docker run -it --rm -v $LOCAL_DATA_DIR:/DATA --name mdio-cli mdio-cli \
# segy import \
# /DATA/segy_file.segy \
# /DATA/mdio_file.mdio \
# -loc 181,185 \
# -names inline,crossline
#
# LOCAL_DATA_DIR=$(pwd); \
# docker run -it --rm -v $LOCAL_DATA_DIR:/DATA --name mdio-cli mdio-cli \
# segy export \
# /DATA/mdio_file.mdio \
# /DATA/segy_file_copy.segy
#
FROM python:3.13-bookworm
# Create the user (https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user)
ENV USERNAME=python
ENV USER_UID=1000
ENV USER_GID=$USER_UID
RUN groupadd --gid $USER_GID $USERNAME && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME

# Set the default non-root user
USER $USERNAME

# Add path to the user-installed packages
ENV PYTHONUSERBASE=/home/$USERNAME/.local
ENV PATH="$PYTHONUSERBASE/bin:$PATH"

COPY --chown=$USERNAME:$USERNAME ./ /home/$USERNAME/mdio-python

WORKDIR /home/$USERNAME/mdio-python
RUN pip install .

ENTRYPOINT ["mdio"]
CMD ["--version"]
61 changes: 61 additions & 0 deletions .devcontainer/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# USAGE:
# This file will be used by the VS Code DevContainer extension
# to create a development environment for the mdio-python project.
# HOW TO RUN TESTS
# 1. Open the project in VS Code.
# 2. Open the Command Palette (Ctrl+Shift+P) and select "Dev Containers: Reopen in Container".
# 3. Once the container is running, open a terminal in VS Code.
# 4. Run the tests using the command: `nox -s test`.
# HOW TO MANUALLY BUILD AND RUN THE CONTAINER
# docker build -t mdio-dev -f .devcontainer/Dockerfile.dev .
# docker run -it --rm --entrypoint /bin/bash --name mdio-dev mdio-dev
# NOTES:
# 1. The container will be run as the non-root user 'vscode' with UID 1000.
# 2. The virtual environment will be setup at /home/vscode/venv
# 3. The project source code will be mounted at /workspaces/mdio-python
ARG PYTHON_VERSION="3.13"
ARG LINUX_DISTRO="bookworm"
ARG UV_VERSION="0.6.11"
ARG NOX_VERSION="2025.2.9"
FROM mcr.microsoft.com/devcontainers/python:1-${PYTHON_VERSION}-${LINUX_DISTRO}

# Install git for nox pre-commit
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
git \
&& rm -rf /var/lib/apt/lists/*

ENV USERNAME="vscode"
USER $USERNAME

# # Add path to the user-installed packages
# ENV PYTHONUSERBASE=/home/$USERNAME/.local
# ENV PATH="$PYTHONUSERBASE/bin:$PATH"

COPY --chown=$USERNAME:$USERNAME ./ /workspaces/mdio-python

WORKDIR /workspaces/mdio-python

ARG UV_VERSION
ARG NOX_VERSION
RUN python3 -m pip install uv==${UV_VERSION} nox==${NOX_VERSION} msgpack ipykernel

# Initialize virtual environement in the container
ENV VIRTUAL_ENV="/home/$USERNAME/venv"
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

# installing pytest is required for VS Code Python Testing
RUN pip install pytest pytest-cov pytest-mock pytest-asyncio

# Install the project in editable mode
# This allows for live reloading of the code during development
RUN pip install -e .

# RUN uv pip install snakeviz






49 changes: 49 additions & 0 deletions .devcontainer/Dockerfile.nox
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# HOW TO BUILD AND RUN THIS DOCKERFILE
# 1. Make sure you have Docker installed and running.
# 2. Clone mdio-python and build the Docker image:
# git clone https://github.com/TGSAI/mdio-python.git
# cd mdio-python
# docker build -t mdio-nox -f .devcontainer/Dockerfile.nox .
# 3. Run /bin/bash in the Docker container :
# LOCAL_DATA_DIR=$(pwd); \
# docker run -it --rm -v $LOCAL_DATA_DIR:/DATA --entrypoint /bin/bash --name mdio-nox mdio-nox
#
# USAGE:
# docker run -it --rm mdio-nox --list
# docker run -it --rm mdio-nox -s tests-3.13
# docker run -it --rm mdio-nox --no-stop-on-first-error
#
# NOTE: nox will fail if run in the directory mounted from the host machine
ARG PYTHON_VERSION="3.13"
ARG LINUX_DISTRO="bookworm"
ARG UV_VERSION="0.6.11"
ARG NOX_VERSION="2025.2.9"
FROM python:${PYTHON_VERSION}-${LINUX_DISTRO}
ARG PYTHON_VERSION
ARG LINUX_DISTRO
RUN echo "Using python:${PYTHON_VERSION}-${LINUX_DISTRO}"
# Create the user (https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user)
ENV USERNAME=python
ENV USER_UID=1000
ENV USER_GID=$USER_UID
RUN groupadd --gid $USER_GID $USERNAME && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME
# Set the default non-root user
USER $USERNAME

# Add path to the user-installed packages
ENV PYTHONUSERBASE=/home/$USERNAME/.local
ENV PATH="$PYTHONUSERBASE/bin:$PATH"

COPY --chown=$USERNAME:$USERNAME ./ /home/$USERNAME/mdio-python

WORKDIR /home/$USERNAME/mdio-python
RUN pip install .

# Install UV dependency manager and Nox test automator
ARG UV_VERSION
ARG NOX_VERSION
RUN echo "Using uv: $UV_VERSION and nox: $NOX_VERSION"
RUN python3 -m pip install uv==${UV_VERSION} nox==${NOX_VERSION} msgpack ipykernel

ENTRYPOINT ["nox"]
CMD ["--list"]
20 changes: 11 additions & 9 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"build": {
"dockerfile": "Dockerfile",
"dockerfile": "Dockerfile.dev",
"context": ".."
},
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": {
"post_create_script": "bash ./.devcontainer/post-install.sh"
// "post_create_script": "bash ./.devcontainer/post-install.sh"
},
// Forward 8787 to enable us to view dask dashboard
"forwardPorts": [8787],
Expand All @@ -16,8 +16,9 @@
// Configure properties specific to VS Code.
"vscode": {
"settings": {
"python.terminal.activateEnvInCurrentTerminal": true,
"python.defaultInterpreterPath": "/opt/venv/bin/python"
"python.testing.pytestArgs": ["tests"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
},
"extensions": [
"ms-python.python",
Expand All @@ -27,17 +28,18 @@
"ms-toolsai.jupyter-renderers",
"vscode-icons-team.vscode-icons",
"wayou.vscode-todo-highlight",
"streetsidesoftware.code-spell-checker"
"streetsidesoftware.code-spell-checker",
"eamodio.gitlens",
"visualstudioexptteam.vscodeintellicode"
]
}
},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root",
"updateRemoteUserUID": true,
"workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/mdio-python,type=bind",
"workspaceFolder": "/workspaces/mdio-python",
"mounts": [
// Re-use local Git configuration
"source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached",
"source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached",
"source=${localEnv:SCRATCH_DIR}/${localEnv:USER},target=/scratch/,type=bind,consistency=cached"
// "source=${localWorkspaceFolder}/../DATA/,target=/DATA/,type=bind,consistency=cached"
]
}
24 changes: 0 additions & 24 deletions src/mdio/schemas/v1/dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,30 +245,6 @@ def add_coordinate( # noqa: PLR0913
self._state = _BuilderState.HAS_COORDINATES
return self

def add_dimension_coordinate(
self,
dimension_name: str,
*,
data_type: ScalarType,
compressor: Blosc | ZFP | None = None,
metadata_info: VariableMetadataList | None = None,
) -> "MDIODatasetBuilder":
"""Add a dimension coordinate variable for a pre-existing dimension.
This is a convenience method to create a coordinate variable
that represents sampling along a dimension.

The dimension coordinate is a coordinate that has a single dimension and
the name of the coordinate is the same as the name of the dimension
"""
self.add_coordinate(dimension_name,
long_name=dimension_name,
dimensions=[dimension_name],
data_type=data_type,
compressor=compressor,
metadata_info=_to_dictionary(metadata_info))

return self

def add_variable( # noqa: PLR0913
self,
name: str,
Expand Down
50 changes: 12 additions & 38 deletions tests/unit/v1/test_dataset_builder_add_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def test_add_variable_with_coords() -> None:
builder.add_dimension("depth", 300)

# Add dimension coordinates before we can add a data variable
builder.add_dimension_coordinate("inline", data_type=ScalarType.INT32)
builder.add_dimension_coordinate("crossline", data_type=ScalarType.INT32)
builder.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.UINT32)
builder.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.UINT32)

# Validate: adding a variable with a coordinate that has not been pre-created is not allowed
msg = "Pre-existing coordinate named 'depth' is not found"
Expand Down Expand Up @@ -168,32 +168,6 @@ def test_add_variable_with_coords() -> None:
assert builder._get_coordinate(var_ampl2.coordinates, "cdp-y") is not None


def test_add_dimension_coordinate() -> None:
"""Test adding dimension variable."""
builder = MDIODatasetBuilder("test_dataset")
builder.add_dimension("inline", 100)

builder.add_dimension_coordinate("inline", data_type=ScalarType.INT32)

# Validate: that coordinate is stored in the builder global list
coord_il = builder._get_coordinate(builder._coordinates, "inline")
# Validate: that dimensions are stored as NamedDimensions in the coordinate
assert _get_named_dimension(coord_il.dimensions, "inline", 100) is not None
# Validate: a dim variable has been created
var_il = next((e for e in builder._variables if e.name == "inline"), None)
assert var_il is not None
# Validate: the variable has the expected properties
assert var_il.name == "inline"
assert var_il.long_name == "'inline' coordinate variable"
assert len(var_il.dimensions) == 1
assert _get_named_dimension(var_il.dimensions, "inline", 100) is not None
assert var_il.data_type == ScalarType.INT32
assert var_il.compressor is None # Default value
assert len(var_il.coordinates) == 1
assert builder._get_coordinate(var_il.coordinates, "inline") is not None
assert var_il.metadata is None # Default value


def test_add_variable_with_defaults() -> None:
"""Test adding variable with default arguments."""
builder = MDIODatasetBuilder("test_dataset")
Expand All @@ -202,12 +176,12 @@ def test_add_variable_with_defaults() -> None:
builder.add_dimension("crossline", 200)
builder.add_dimension("depth", 300)
# Add dimension coordinates
builder.add_dimension_coordinate("inline", data_type=ScalarType.INT32)
builder.add_dimension_coordinate("crossline", data_type=ScalarType.INT32)
builder.add_dimension_coordinate("depth",
data_type=ScalarType.FLOAT32,
metadata_info=[
AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))])
builder.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.UINT32)
builder.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.UINT32)
builder.add_coordinate("depth", dimensions=["depth"], data_type=ScalarType.UINT32,
metadata_info=[
AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))
])

# Add data variable using defaults
builder.add_variable("ampl",
Expand Down Expand Up @@ -244,10 +218,10 @@ def test_add_variable_full_parameters() -> None:
builder.add_dimension("depth", 300)

# Add dimension coordinates
builder.add_dimension_coordinate("inline", data_type=ScalarType.INT32)
builder.add_dimension_coordinate("crossline", data_type=ScalarType.INT32)
builder.add_dimension_coordinate("depth", data_type=ScalarType.INT32)
builder.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.UINT32)
builder.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.UINT32)
builder.add_coordinate("depth", dimensions=["depth"], data_type=ScalarType.UINT32)

# Add coordinates before we can add a data variable
builder.add_coordinate("cdp-x", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT64)
builder.add_coordinate("cdp-y", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT64)
Expand Down
16 changes: 7 additions & 9 deletions tests/unit/v1/test_dataset_builder_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@ def test_build() -> None:
MDIODatasetBuilder("test_dataset")
.add_dimension("inline", 100)
.add_dimension("crossline", 200)
# Add a dimension coordinate explicitly using add_coordinate()
.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.FLOAT64)
# Add a dimension coordinate using .add_dimension_coordinate() shortcut
.add_dimension_coordinate("crossline", data_type=ScalarType.FLOAT64)
.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.FLOAT64)
.add_coordinate("x_coord", dimensions=["inline", "crossline"])
.add_coordinate("y_coord", dimensions=["inline", "crossline"])
.add_variable("data",
Expand Down Expand Up @@ -176,12 +174,12 @@ def make_campos_3d_dataset() -> Dataset:
ds.add_dimension("inline", 256)
ds.add_dimension("crossline", 512)
ds.add_dimension("depth", 384)
ds.add_dimension_coordinate("inline", data_type=ScalarType.UINT32)
ds.add_dimension_coordinate("crossline", data_type=ScalarType.UINT32)
ds.add_dimension_coordinate("depth", data_type=ScalarType.FLOAT64,
metadata_info=[
AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))
])
ds.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.UINT32)
ds.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.UINT32)
ds.add_coordinate("depth", dimensions=["depth"], data_type=ScalarType.FLOAT64,
metadata_info=[
AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))
])
# Add coordinates
ds.add_coordinate(
"cdp-x",
Expand Down