diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 9409d741..00000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,55 +0,0 @@ -version: 2 -updates: -- package-ecosystem: "github-actions" - directory: "/" - target-branch: main - schedule: - interval: "monthly" - time: "07:00" - timezone: "Etc/GMT-4" # EDT/EST - pull-request-branch-name: - separator: "-" - open-pull-requests-limit: 2 - reviewers: - - janbridley - groups: - actions-version: - applies-to: version-updates - patterns: - - '*' - actions-security: - applies-to: security-updates - patterns: - - '*' -- package-ecosystem: "pip" - directory: "/" - target-branch: main - schedule: - interval: "monthly" - time: "07:00" - timezone: "Etc/GMT-4" # EDT/EST - pull-request-branch-name: - separator: "-" - open-pull-requests-limit: 2 - reviewers: - - janbridley - ignore: - - dependency-name: "pytest" - versions: [ "<=7.0.1" ] - - dependency-name: "gemmi" - versions: [ "<=0.6.3" ] - groups: - pip-version: - applies-to: version-updates - patterns: - - '*' - update-types: - - minor - - patch - pip-security: - applies-to: security-updates - patterns: - - '*' - update-types: - - minor - - patch diff --git a/.github/requirements-3.10.txt b/.github/requirements-3.10.txt new file mode 100644 index 00000000..16ee2ee5 --- /dev/null +++ b/.github/requirements-3.10.txt @@ -0,0 +1,47 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile --python-version=3.10 pyproject.toml tests/requirements.in +ase==3.23.0 + # via -r tests/requirements.in +contourpy==1.3.1 + # via matplotlib +cycler==0.12.1 + # via matplotlib +exceptiongroup==1.2.2 + # via pytest +fonttools==4.55.3 + # via matplotlib +gemmi==0.7.0 + # via -r tests/requirements.in +iniconfig==2.0.0 + # via pytest +kiwisolver==1.4.7 + # via matplotlib +matplotlib==3.10.0 + # via ase +numpy==2.2.0 + # via + # parsnip (pyproject.toml) + # ase + # contourpy + # matplotlib + # scipy +packaging==24.2 + # via + # matplotlib + # pytest +pillow==11.0.0 + # via matplotlib +pluggy==1.5.0 + # via pytest +pyparsing==3.2.0 + # via matplotlib +pytest==8.3.4 + # via -r tests/requirements.in +python-dateutil==2.9.0.post0 + # via matplotlib +scipy==1.14.1 + # via ase +six==1.17.0 + # via python-dateutil +tomli==2.2.1 + # via pytest diff --git a/.github/requirements-3.11.txt b/.github/requirements-3.11.txt new file mode 100644 index 00000000..1d780439 --- /dev/null +++ b/.github/requirements-3.11.txt @@ -0,0 +1,43 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile --python-version=3.11 pyproject.toml tests/requirements.in +ase==3.23.0 + # via -r tests/requirements.in +contourpy==1.3.1 + # via matplotlib +cycler==0.12.1 + # via matplotlib +fonttools==4.55.3 + # via matplotlib +gemmi==0.7.0 + # via -r tests/requirements.in +iniconfig==2.0.0 + # via pytest +kiwisolver==1.4.7 + # via matplotlib +matplotlib==3.10.0 + # via ase +numpy==2.2.0 + # via + # parsnip (pyproject.toml) + # ase + # contourpy + # matplotlib + # scipy +packaging==24.2 + # via + # matplotlib + # pytest +pillow==11.0.0 + # via matplotlib +pluggy==1.5.0 + # via pytest +pyparsing==3.2.0 + # via matplotlib +pytest==8.3.4 + # via -r tests/requirements.in +python-dateutil==2.9.0.post0 + # via matplotlib +scipy==1.14.1 + # via ase +six==1.17.0 + # via python-dateutil diff --git a/.github/requirements-3.12.txt b/.github/requirements-3.12.txt new file mode 100644 index 00000000..fdf0bdbd --- /dev/null +++ b/.github/requirements-3.12.txt @@ -0,0 +1,43 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile --python-version=3.12 pyproject.toml tests/requirements.in +ase==3.23.0 + # via -r tests/requirements.in +contourpy==1.3.1 + # via matplotlib +cycler==0.12.1 + # via matplotlib +fonttools==4.55.3 + # via matplotlib +gemmi==0.7.0 + # via -r tests/requirements.in +iniconfig==2.0.0 + # via pytest +kiwisolver==1.4.7 + # via matplotlib +matplotlib==3.10.0 + # via ase +numpy==2.2.0 + # via + # parsnip (pyproject.toml) + # ase + # contourpy + # matplotlib + # scipy +packaging==24.2 + # via + # matplotlib + # pytest +pillow==11.0.0 + # via matplotlib +pluggy==1.5.0 + # via pytest +pyparsing==3.2.0 + # via matplotlib +pytest==8.3.4 + # via -r tests/requirements.in +python-dateutil==2.9.0.post0 + # via matplotlib +scipy==1.14.1 + # via ase +six==1.17.0 + # via python-dateutil diff --git a/.github/requirements-3.13.txt b/.github/requirements-3.13.txt new file mode 100644 index 00000000..fa40d9cf --- /dev/null +++ b/.github/requirements-3.13.txt @@ -0,0 +1,43 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile --python-version=3.13 pyproject.toml tests/requirements.in +ase==3.23.0 + # via -r tests/requirements.in +contourpy==1.3.1 + # via matplotlib +cycler==0.12.1 + # via matplotlib +fonttools==4.55.3 + # via matplotlib +gemmi==0.7.0 + # via -r tests/requirements.in +iniconfig==2.0.0 + # via pytest +kiwisolver==1.4.7 + # via matplotlib +matplotlib==3.10.0 + # via ase +numpy==2.2.0 + # via + # parsnip (pyproject.toml) + # ase + # contourpy + # matplotlib + # scipy +packaging==24.2 + # via + # matplotlib + # pytest +pillow==11.0.0 + # via matplotlib +pluggy==1.5.0 + # via pytest +pyparsing==3.2.0 + # via matplotlib +pytest==8.3.4 + # via -r tests/requirements.in +python-dateutil==2.9.0.post0 + # via matplotlib +scipy==1.14.1 + # via ase +six==1.17.0 + # via python-dateutil diff --git a/.github/requirements-3.6.txt b/.github/requirements-3.6.txt new file mode 100644 index 00000000..10287047 --- /dev/null +++ b/.github/requirements-3.6.txt @@ -0,0 +1,56 @@ + # manual, required to use __future__ annotations style in py3.6 +ase==3.22.1 + # via -r tests/requirements.in +cycler==0.11.0 + # via matplotlib +exceptiongroup==0.0.0a0 + # via pytest +fonttools==4.27.1 +# This file was MANUALLY MODIFIED based on an initial state generated via the following command: +# uv pip compile --python-version=3.7 pyproject.toml tests/requirements.in +future-annotations==1.0.0 + # via matplotlib +gemmi==0.6.3 + # via -r tests/requirements.in +importlib-metadata==4.8.3 + # via + # pluggy + # pytest +iniconfig==1.1.1 + # via pytest +kiwisolver==1.3.1 + # via matplotlib +matplotlib==3.3.4 + # via ase +numpy==1.19.5 + # via + # parsnip (pyproject.toml) + # ase + # matplotlib + # scipy +packaging==21.3 + # via + # matplotlib + # pytest +pillow==8.4.0 + # via matplotlib +pluggy==1.0.0 + # via pytest +pyparsing==3.1.4 + # via matplotlib +pytest==7.0.1 + # via -r tests/requirements.in +python-dateutil==2.9.0.post0 + # via matplotlib +scipy==1.5.4 + # via ase +six==1.17.0 + # via python-dateutil +tomli==1.2.3 + # via pytest +typing-extensions==4.1.1 + # via + # importlib-metadata + # kiwisolver +zipp==3.6.0 + # via importlib-metadata diff --git a/.github/requirements-3.7.txt b/.github/requirements-3.7.txt new file mode 100644 index 00000000..3b1a5a49 --- /dev/null +++ b/.github/requirements-3.7.txt @@ -0,0 +1,54 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile --python-version=3.7 pyproject.toml tests/requirements.in +ase==3.22.1 + # via -r tests/requirements.in +cycler==0.11.0 + # via matplotlib +exceptiongroup==1.2.2 + # via pytest +fonttools==4.38.0 + # via matplotlib +gemmi==0.6.7 + # via -r tests/requirements.in +importlib-metadata==6.7.0 + # via + # pluggy + # pytest +iniconfig==2.0.0 + # via pytest +kiwisolver==1.4.5 + # via matplotlib +matplotlib==3.5.3 + # via ase +numpy==1.21.6 + # via + # parsnip (pyproject.toml) + # ase + # matplotlib + # scipy +packaging==24.0 + # via + # matplotlib + # pytest +pillow==9.5.0 + # via matplotlib +pluggy==1.2.0 + # via pytest +pyparsing==3.1.4 + # via matplotlib +pytest==7.4.4 + # via -r tests/requirements.in +python-dateutil==2.9.0.post0 + # via matplotlib +scipy==1.7.3 + # via ase +six==1.17.0 + # via python-dateutil +tomli==2.0.1 + # via pytest +typing-extensions==4.7.1 + # via + # importlib-metadata + # kiwisolver +zipp==3.15.0 + # via importlib-metadata diff --git a/.github/requirements-3.8.txt b/.github/requirements-3.8.txt new file mode 100644 index 00000000..7cbddb06 --- /dev/null +++ b/.github/requirements-3.8.txt @@ -0,0 +1,51 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile --python-version=3.8 pyproject.toml tests/requirements.in +ase==3.23.0 + # via -r tests/requirements.in +contourpy==1.1.1 + # via matplotlib +cycler==0.12.1 + # via matplotlib +exceptiongroup==1.2.2 + # via pytest +fonttools==4.55.3 + # via matplotlib +gemmi==0.7.0 + # via -r tests/requirements.in +importlib-resources==6.4.5 + # via matplotlib +iniconfig==2.0.0 + # via pytest +kiwisolver==1.4.7 + # via matplotlib +matplotlib==3.7.5 + # via ase +numpy==1.24.4 + # via + # parsnip (pyproject.toml) + # ase + # contourpy + # matplotlib + # scipy +packaging==24.2 + # via + # matplotlib + # pytest +pillow==10.4.0 + # via matplotlib +pluggy==1.5.0 + # via pytest +pyparsing==3.1.4 + # via matplotlib +pytest==8.3.4 + # via -r tests/requirements.in +python-dateutil==2.9.0.post0 + # via matplotlib +scipy==1.10.1 + # via ase +six==1.17.0 + # via python-dateutil +tomli==2.2.1 + # via pytest +zipp==3.20.2 + # via importlib-resources diff --git a/.github/requirements-3.9.txt b/.github/requirements-3.9.txt new file mode 100644 index 00000000..d0b897b7 --- /dev/null +++ b/.github/requirements-3.9.txt @@ -0,0 +1,51 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile --python-version=3.9 pyproject.toml tests/requirements.in +ase==3.23.0 + # via -r tests/requirements.in +contourpy==1.3.0 + # via matplotlib +cycler==0.12.1 + # via matplotlib +exceptiongroup==1.2.2 + # via pytest +fonttools==4.55.3 + # via matplotlib +gemmi==0.7.0 + # via -r tests/requirements.in +importlib-resources==6.4.5 + # via matplotlib +iniconfig==2.0.0 + # via pytest +kiwisolver==1.4.7 + # via matplotlib +matplotlib==3.9.4 + # via ase +numpy==2.0.2 + # via + # parsnip (pyproject.toml) + # ase + # contourpy + # matplotlib + # scipy +packaging==24.2 + # via + # matplotlib + # pytest +pillow==11.0.0 + # via matplotlib +pluggy==1.5.0 + # via pytest +pyparsing==3.2.0 + # via matplotlib +pytest==8.3.4 + # via -r tests/requirements.in +python-dateutil==2.9.0.post0 + # via matplotlib +scipy==1.13.1 + # via ase +six==1.17.0 + # via python-dateutil +tomli==2.2.1 + # via pytest +zipp==3.21.0 + # via importlib-resources diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 3f390ee6..44b0824e 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -8,29 +8,29 @@ on: - "breaking" workflow_dispatch: - jobs: run-tests-modern-python: strategy: fail-fast: true matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] runs-on: ["ubuntu-latest"] # Pull in the test script from run_tests and distribute python from matrix versions uses: ./.github/workflows/run_tests.yaml with: python-version: ${{ matrix.python-version }} runs-on: ${{ matrix.runs-on }} + requirements-file: ".github/requirements-${{ matrix.python-version }}.txt" run-tests-legacy-python: needs: run-tests-modern-python # Wait until tests pass on python 3.9+ strategy: - fail-fast: true - matrix: - python-version: ["3.6", "3.7", "3.8"] + fail-fast: false # Legacy versions are much less stable - run tests independently + matrix: # Code works on py3.6, but type annotations are broken + python-version: ["3.7", "3.8"] runs-on: ["ubuntu-20.04"] uses: ./.github/workflows/run_tests.yaml with: python-version: ${{ matrix.python-version }} runs-on: ${{ matrix.runs-on }} - requirements-file: "tests/requirements-legacy.txt" + requirements-file: ".github/requirements-${{ matrix.python-version }}.txt" diff --git a/.github/workflows/requirements.yaml b/.github/workflows/requirements.yaml new file mode 100644 index 00000000..6274176b --- /dev/null +++ b/.github/workflows/requirements.yaml @@ -0,0 +1,23 @@ +name: Generate lockfiles + +on: + schedule: [ "0 0 1 * *" ] + +jobs: + install-python-and-run: + runs-on: "ubuntu-20.04" + + strategy: + # Python 3.6 file cannot be generated with `uv`, and is manually resolved + matrix: { python-version: [3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13] } + + steps: + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Run generate-files.sh + run: | + uv pip compile --python-version=${{ matrix.python-version}} \ + pyproject.toml tests/requirements.in > .github/requirements-${{ matrix.python-version }}.txt diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index e3e125a7..5db51c45 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -10,27 +10,27 @@ on: required: true type: string requirements-file: - default: "tests/requirements.txt" + required: true type: string jobs: run_test: runs-on: ${{ inputs.runs-on }} steps: - # Steps common to both groups - uses: actions/checkout@v4 - - name: Set up Python ${{ inputs.python-version }} - uses: actions/setup-python@v5 + - name: Install uv + uses: astral-sh/setup-uv@v4 with: python-version: ${{ inputs.python-version }} - - name: Install dependencies + version: "0.5.11" + - name: Set up environment with dependencies run: | - python -m pip install -r requirements.txt - python -m pip install -r ${{ inputs.requirements-file }} + uv venv && . .venv/bin/activate + echo PATH=$PATH >> $GITHUB_ENV - name: Install package run: | python --version - python -m pip install . -v --progress-bar off + uv pip install . -r ${{ inputs.requirements-file }} -v python -c "import parsnip; print('parsnip', parsnip.__version__)" - name: Test with pytest run: | diff --git a/README.rst b/README.rst index 18db44a2..e4628290 100644 --- a/README.rst +++ b/README.rst @@ -2,11 +2,7 @@ .. image:: doc/source/_static/parsnip_header_dark.svg :width: 600 - :class: only-light -.. image:: doc/source/_static/parsnip_header_light.svg - :width: 600 - :class: only-dark .. _header: @@ -32,7 +28,7 @@ .. _parse: The ``parsnip.parse`` module handles standard CIF files (including those under the `CIF 1.1 `_ and `CIF 2.0 `_ standards), as well as many features from the `mmCIF `_ format. -The package includes a table reader for `loop\_`-delimited tables as well as a key-value pair reader. Provide a filename and a list of keys to either of these functions and you're all set to read start parsing CIF and mmCIF files! +The package includes a table reader for `loop\_`-delimited tables as well as a key-value pair reader. Provide a filename and a list of keys to either of these functions and you're all set to read start parsing CIF files! .. _installing: diff --git a/changelog.rst b/changelog.rst new file mode 100644 index 00000000..7e429929 --- /dev/null +++ b/changelog.rst @@ -0,0 +1,10 @@ +Changelog +========= + +The format is based on `Keep a Changelog `__. +This project adheres to `Semantic Versioning `__. + +v0.x.x - 20xx-xx-xx + +Added +~~~~~ diff --git a/credits.rst b/credits.rst new file mode 100644 index 00000000..c1f8f372 --- /dev/null +++ b/credits.rst @@ -0,0 +1,6 @@ +Credits +======= + +The following people have contributed to **parsnip**: + +* Jen Bradley, University of Michigan - *Creator and lead developer* diff --git a/doc/requirements.in b/doc/requirements.in index 64808244..d2586601 100644 --- a/doc/requirements.in +++ b/doc/requirements.in @@ -1,4 +1,4 @@ -autodocsumm==0.2.12 -furo==2024.5.6 +autodocsumm +furo numpy>=1.26.4 -sphinx==7.3.7 +sphinx>=7.3.7 diff --git a/doc/requirements.txt b/doc/requirements.txt index 3a2e2322..afdf45c7 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,48 +1,46 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --strip-extras requirements.in -# -alabaster==0.7.16 +# This file was autogenerated by uv via the following command: +# uv pip compile --python-version=3.12 pyproject.toml doc/requirements.in +alabaster==1.0.0 # via sphinx -autodocsumm==0.2.12 +autodocsumm==0.2.14 # via -r doc/requirements.in -babel==2.15.0 +babel==2.16.0 # via sphinx beautifulsoup4==4.12.3 # via furo -certifi==2024.2.2 +certifi==2024.12.14 # via requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests docutils==0.21.2 # via sphinx -furo==2024.5.6 +furo==2024.8.6 # via -r doc/requirements.in -idna==3.7 +idna==3.10 # via requests imagesize==1.4.1 # via sphinx jinja2==3.1.4 # via sphinx -markupsafe==2.1.5 +markupsafe==3.0.2 # via jinja2 -numpy==2.0.0 - # via -r doc/requirements.in -packaging==24.0 +numpy==2.2.0 + # via + # -r doc/requirements.in + # parsnip (pyproject.toml) +packaging==24.2 # via sphinx pygments==2.18.0 # via # furo # sphinx -requests==2.31.0 +requests==2.32.3 # via sphinx snowballstemmer==2.2.0 # via sphinx -soupsieve==2.5 +soupsieve==2.6 # via beautifulsoup4 -sphinx==7.3.7 +sphinx==8.1.3 # via # -r doc/requirements.in # autodocsumm @@ -50,17 +48,17 @@ sphinx==7.3.7 # sphinx-basic-ng sphinx-basic-ng==1.0.0b2 # via furo -sphinxcontrib-applehelp==1.0.8 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.6 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==2.0.5 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.7 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.10 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx -urllib3==2.2.1 +urllib3==2.2.3 # via requests diff --git a/doc/source/conf.py b/doc/source/conf.py index cc7bc6c7..01f201f7 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -44,7 +44,7 @@ # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - +source_repository = "https://github.com/glotzerlab/parsnip/" html_theme = "furo" html_static_path = ["_static"] html_theme_options = { @@ -59,7 +59,8 @@ "color-brand-primary": "#005A50", "color-brand-content": "#406a8c", }, - "top_of_page_button": "edit", - "source_edit_link": "https://github.com/glotzerlab/parsnip", + "source_edit_link": "https://github.com/glotzerlab/parsnip/edit/main/doc/source/{filename}", + "source_view_link": "https://github.com/glotzerlab/parsnip", } + html_favicon = "_static/parsnip_logo_favicon.svg" diff --git a/doc/source/index.rst b/doc/source/index.rst index c68adbd4..4fb7eed9 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -24,6 +24,7 @@ package-parse package-patterns + package-unitcells .. toctree:: diff --git a/doc/source/package-unitcells.rst b/doc/source/package-unitcells.rst new file mode 100644 index 00000000..5d31584d --- /dev/null +++ b/doc/source/package-unitcells.rst @@ -0,0 +1,7 @@ +Unitcells Module +============================== + +.. rubric:: Overview + +.. automodule:: parsnip.unitcells + :members: diff --git a/parsnip/__init__.py b/parsnip/__init__.py index 82ed99be..aef22773 100644 --- a/parsnip/__init__.py +++ b/parsnip/__init__.py @@ -1,4 +1,14 @@ -"""TODO: Add docstring.""" -from . import parse, patterns +"""``parsnip``: a package for the simple reading and processing of .cif files. + +While there are many packages for handling cif files exist, the vast majority suffer +from decades of feature creep and high levels of complexity. ``parsnip`` provides a +simple and minimal interface for reading cif files into Python primitive data structures +and numpy arrays. The ``parsnip.parse`` module contains exactly two functions that read +key-value and tabular data from cif files, and are all that are required for most users. +The ``parsnip.patterns`` module includes a few convience features for manipulation of +the read data, and the ``parsnip.unitcells`` module includes functions to reconstruct a +crystal's unit cell's basis positions from data stored in cif files. +""" +from . import parse, patterns, unitcells __version__ = "0.0.2" diff --git a/parsnip/_utils.py b/parsnip/_utils.py index bda30550..b2bb4513 100644 --- a/parsnip/_utils.py +++ b/parsnip/_utils.py @@ -9,3 +9,16 @@ def _str2num(val: str): def _deg2rad(val: float): """Convert a value in degrees to one in radians.""" return val * np.pi / 180 + + +def _get_distances(positions: np.ndarray): + # Get all indices i!=j + i_indices, j_indices = np.triu_indices(len(positions), k=1) + + # Compute difference vectors + r_xyz = positions[i_indices] - positions[j_indices] + + # Compute distances from vectors. + ij_distances = np.einsum("ij,ij->i", r_xyz, r_xyz, optimize="optimal") + + return ij_distances, i_indices, j_indices diff --git a/parsnip/parse.py b/parsnip/parse.py index ce6fd23a..cc571839 100644 --- a/parsnip/parse.py +++ b/parsnip/parse.py @@ -9,7 +9,7 @@ This is an example of a simple CIF file. A `key`_ (data name or tag) must start with an underscore, and is seperated from the data value with whitespace characters. A `table`_ begins with the ``loop_`` keyword, and contain a header block and a data - block. The vertical position of a tag in the table heading corresponds with the + block. The vertical position of a tag in the table headings corresponds with the horizontal position of the associated column in the table values. .. code-block:: text @@ -49,7 +49,7 @@ import numpy as np from ._errors import ParseError, ParseWarning -from ._utils import _deg2rad, _str2num +from ._utils import _str2num from .patterns import LineCleaner, cast_array_to_float, remove_nondelimiting_whitespace @@ -84,7 +84,7 @@ def read_table( nondelimiting_whitespace_replacement (str, optional): Character to replace non-delimiting whitespaces with. Default value = ``"_"`` - regex_filter (tuple[str,str], optional): + regex_filter (tuple[str,str] | tuple[tuple[str,str]], optional): A tuple of strings that are compiled to a regex filter and applied to each data line. If a tuple of tuples of strings is provided instead, each pattern will be applied seperately. @@ -314,69 +314,3 @@ def read_key_value_pairs( ) return data - - -def read_cell_params(filename, degrees: bool = True, mmcif: bool = False): - r"""Read the cell lengths and angles from a CIF file. - - Args: - filename (str): The name of the .cif file to be parsed. - degrees (bool, optional): - When True, angles are returned in degrees (as per the cif spec). When False, - angles are converted to radians. - Default value = ``True`` - mmcif (bool, optional): - When False, the standard CIF key naming is used (e.g. _cell_angle_alpha). - When True, the mmCIF standard is used instead (e.g. cell.angle_alpha). - Default value = ``False`` - - Returns: - tuple: - The box vector lengths and angles in degrees or radians - :math:`(L_1, L_2, L_3, \alpha, \beta, \gamma)`. - """ - if mmcif: - angle_keys = ("_cell.angle_alpha", "_cell.angle_beta", "_cell.angle_gamma") - box_keys = ("_cell.length_a", "_cell.length_b", "_cell.length_c") + angle_keys - else: - angle_keys = ("_cell_angle_alpha", "_cell_angle_beta", "_cell_angle_gamma") - box_keys = ("_cell_length_a", "_cell_length_b", "_cell_length_c") + angle_keys - cell_data = read_key_value_pairs(filename, keys=box_keys, only_read_numerics=True) - - assert all(value is not None for value in cell_data.values()) - assert all(0 < cell_data[key] < 180 for key in angle_keys) - - if not degrees: - for key in angle_keys: - cell_data[key] = _deg2rad(cell_data[key]) - - return tuple(cell_data.values()) - - -def read_fractional_positions( - filename: str, - regex_filter: tuple = ((r",\s+", ",")), -): - r"""Extract the fractional X,Y,Z coordinates from a CIF file. - - Args: - filename (str): The name of the .cif file to be parsed. - regex_filter (tuple[tuple[str,str]], optional): - A tuple of strings that are compiled to a regex filter and applied to each - data line. Default value = ``((r",\s+",","))`` - - Returns: - :math:`(N, 3)` :class:`numpy.ndarray[np.float32]`: - Fractional X,Y,Z coordinates of the unit cell. - """ - xyz_keys = ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z") - # Once #6 is added, we should warnings.catch_warnings(action="error") - xyz_data = read_table(filename=filename, keys=xyz_keys, regex_filter=regex_filter) - - xyz_data = cast_array_to_float(arr=xyz_data, dtype=np.float32) - - # Validate results - assert xyz_data.shape[1] == 3 - assert xyz_data.dtype == np.float32 - - return xyz_data diff --git a/parsnip/unitcells.py b/parsnip/unitcells.py new file mode 100644 index 00000000..8707ae2f --- /dev/null +++ b/parsnip/unitcells.py @@ -0,0 +1,273 @@ +"""Functions for constructing unit cells from CIF file data. + +Rather than storing an entire unit cell's atomic positions, CIF files instead include +the data required to recreate those positions based on symmetry rules. Symmetry +operations (stored as strings of x,y,z position permutations) are applied to the Wyckoff +(symmetry irreducible) positions to create a list of possible atomic sites. These are +then wrapped into the unit cell and filtered for uniqueness to yield the final crystal. + +""" +from __future__ import annotations + +import re +import warnings + +import numpy as np + +from parsnip._errors import ParseWarning +from parsnip._utils import _deg2rad +from parsnip.parse import read_key_value_pairs, read_table +from parsnip.patterns import cast_array_to_float + + +def _matrix_from_lengths_and_angles(l1, l2, l3, alpha, beta, gamma): + a1 = np.array([l1, 0, 0]) + a2 = np.array([l2 * np.cos(gamma), l2 * np.sin(gamma), 0]) + a3x = np.cos(beta) + a3y = (np.cos(alpha) - np.cos(beta) * np.cos(gamma)) / np.sin(gamma) + under_sqrt = 1 - a3x**2 - a3y**2 + if under_sqrt < 0: + raise ValueError("The provided angles can not form a valid box.") + a3z = np.sqrt(under_sqrt) + a3 = np.array([l3 * a3x, l3 * a3y, l3 * a3z]) + return np.array([a1, a2, a3]) + + +def read_wyckoff_positions( + filename: str, + regex_filter: tuple[tuple[str, str]] | None = ((r",\s+", ",")), +): + r"""Extract the symmetry-irreducible, fractional X,Y,Z coordinates from a CIF file. + + Args: + filename (str): The name of the .cif file to be parsed. + regex_filter (tuple[tuple[str]], optional): + A tuple of strings that are compiled to a regex filter and applied to each + data line. Default value = ``None`` + + Returns: + :math:`(N, 3)` :class:`numpy.ndarray[np.float32]`: + Fractional X,Y,Z coordinates of the unit cell. + """ + xyz_keys = ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z") + xyz_data = read_table( + filename=filename, + keys=xyz_keys, + nondelimiting_whitespace_replacement="", + regex_filter=regex_filter, + ) + xyz_data = cast_array_to_float(arr=xyz_data, dtype=np.float64) + assert xyz_data.shape[1] == 3 + + return xyz_data + + +def read_symmetry_operations( + filename, + regex_filter: tuple[tuple[str, str]] | None = None, +): + r"""Extract the symmetry operations from a CIF file. + + Args: + filename (str): The name of the .cif file to be parsed. + regex_filter (tuple[tuple[str]], optional): + A tuple of strings that are compiled to a regex filter and applied to each + data line. Default value = ``None`` + + Returns: + :math:`(N,)` :class:`numpy.ndarray[str]`: + Symmetry operations as strings. + """ + symmetry_keys = ( + "_symmetry_equiv_pos_as_xyz", + "_space_group_symop_operation_xyz", + ) + + # Only one of the two keys will be matched. We can safely ignore that warning. + warnings.filterwarnings("ignore", "Keys {'_", category=ParseWarning) + data = read_table( + filename=filename, + keys=symmetry_keys, + regex_filter=regex_filter, + nondelimiting_whitespace_replacement="", + ) + + return data + + +def read_cell_params(filename, degrees: bool = True, mmcif: bool = False): + r"""Read the cell lengths and angles from a CIF file. + + Args: + filename (str): The name of the .cif file to be parsed. + degrees (bool, optional): + When True, angles are returned in degrees (as per the cif spec). When False, + angles are converted to radians. + Default value = ``True`` + mmcif (bool, optional): + When False, the standard CIF key naming is used (e.g. _cell_angle_alpha). + When True, the mmCIF standard is used instead (e.g. cell.angle_alpha). + Default value = ``False`` + + Returns: + tuple: + The box vector lengths and angles in degrees or radians + :math:`(L_1, L_2, L_3, \alpha, \beta, \gamma)`. + """ + if mmcif: + angle_keys = ("_cell.angle_alpha", "_cell.angle_beta", "_cell.angle_gamma") + box_keys = ("_cell.length_a", "_cell.length_b", "_cell.length_c") + angle_keys + else: + angle_keys = ("_cell_angle_alpha", "_cell_angle_beta", "_cell_angle_gamma") + box_keys = ("_cell_length_a", "_cell_length_b", "_cell_length_c") + angle_keys + cell_data = read_key_value_pairs(filename, keys=box_keys, only_read_numerics=True) + + assert all(value is not None for value in cell_data.values()) + assert all( + 0 < cell_data[key] < 180 for key in angle_keys + ), "Read cell params were not in the expected range (0 < angle < 180 degrees)." + + if not degrees: + for key in angle_keys: + cell_data[key] = _deg2rad(cell_data[key]) + + return tuple(cell_data.values()) + + +def _safe_eval(str_input: str, x: int | float, y: int | float, z: int | float): + """Attempt to safely evaluate a string of symmetry equivalent positions. + + Python's ``eval`` is notoriously unsafe. While we could evaluate the entire list at + once, doing so carries some risk. The typical alternative, ``ast.literal_eval``, + doesnot work because we need to evaluate mathematical operations. + + We first replace the x,y,z values with ordered fstring inputs, to simplify the input + of fractional coordinate data. This is done for convenience more than security. + + Once we substitute in the x,y,z values, we should have a string version of a list + containing only numerics and math operators. We apply a substitution to ensure this + is the case, then perform one final check. If it passes, we evaluate the list. Note + that __builtins__ is set to {}, meaning importing functions is not possible. The + __locals__ dict is also set to {}, so no variables are accessible in the evaluation. + + I cannot guarantee this is fully safe, but it at the very least makes it extremely + difficult to do any funny business. + + Args: + str_input (str): String to be evaluated. + x (int|float): Fractional coordinate in :math:`x`. + y (int|float): Fractional coordinate in :math:`y`. + z (int|float): Fractional coordinate in :math:`z`. + + Returns: + list[list[int|float,int|float,int|float]]: + :math:`(N,3)` list of fractional coordinates. + + """ + ordered_inputs = {"x": "{0:.20f}", "y": "{1:.20f}", "z": "{2:.20f}"} + # Replace any x, y, or z with the same character surrounded by curly braces. Then, + # perform substitutions to insert the actual values. + substituted_string = ( + re.sub(r"([xyz])", r"{\1}", str_input).format(**ordered_inputs).format(x, y, z) + ) + + # Remove any unexpected characters from the string. + safe_string = re.sub(r"[^\d\[\]\,\+\-\/\*\.]", "", substituted_string) + # Double check to be sure: + assert all(char in ",.0123456789+-/*[]" for char in safe_string), ( + "Evaluation aborted. Check that symmetry operation string only contains " + "numerics or characters in { [],.+-/ } and adjust `regex_filter` param " + "accordingly." + ) + return eval(safe_string, {"__builtins__": {}}, {}) # noqa: S307 + + +def _write_debug_output(unique_indices, unique_counts, pos, check="Initial"): + print(f"{check} uniqueness check:") + if len(unique_indices) == len(pos): + print("... all points are unique (within tolerance).") + else: + print("(duplicate point, number of occurences)") + [ + print(pt, count) + for pt, count in zip(pos[unique_indices], unique_counts) + if count > 1 + ] + + print() + + +def extract_atomic_positions( + filename: str, + fractional: bool = True, + n_decimal_places: int = 4, + verbose: bool = False, +): + """Reconstruct atomic positions from Wyckoff sites and symmetry operations. + + .. warning:: + + Reconstructing positions requires several floating point calculations that can + be impacted by low-precision data in CIF files. Typically, at least four decimal + places are required to accurately reconstruct complicated unit cells: less + precision than this can yield cells with duplicate or missing positions. + + Args: + filename (str): The name of the .cif file to be parsed. + fractional (bool, optional): + Whether to return fractional or absolute coordinates. + Default value = ``True`` + n_decimal_places (int, optional): + The number of decimal places to round each position to for the uniqueness + comparison. Values higher than 4 may not work for all CIF files. + Default value = ``4`` + verbose (bool, optional): + Whether to print debug information about the uniqueness checks. + Default value = ``False`` + + Returns: + :math:`(N, 3)` :class:`numpy.ndarray[np.float32]`: + The full unit cell of the crystal structure. + """ + fractional_positions = read_wyckoff_positions(filename=filename) + + # Read the cell params and conver to a matrix of basis vectors + cell = read_cell_params(filename, degrees=False, mmcif=False) + cell_matrix = _matrix_from_lengths_and_angles(*cell) + + symops = read_symmetry_operations(filename) + symops_str = np.array2string( + symops, + separator=",", # Place a comma after each line in the array. Required for eval + threshold=np.inf, # Ensure that every line is included in the string + floatmode="unique", # Ensures strings can uniquely represent each float number + ) + + all_frac_positions = [_safe_eval(symops_str, *xyz) for xyz in fractional_positions] + + pos = np.vstack(all_frac_positions) + pos %= 1 # Wrap particles into the box + + # Filter unique points. This takese some time, but makes the method faster overall + _, unique_indices, unique_counts = np.unique( + pos.round(n_decimal_places), return_index=True, return_counts=True, axis=0 + ) + + if verbose: + _write_debug_output(unique_indices, unique_counts, pos, check="Initial") + + # Remove initial duplicates, then map to real space for a second check + pos = pos[unique_indices] + real_space_positions = pos @ cell_matrix + + _, unique_indices, unique_counts = np.unique( + real_space_positions.round(n_decimal_places), + return_index=True, + return_counts=True, + axis=0, + ) + + if verbose: + _write_debug_output(unique_indices, unique_counts, pos, check="Secondary") + + return pos[unique_indices] if fractional else real_space_positions[unique_indices] diff --git a/pyproject.toml b/pyproject.toml index 2fb22f24..eac25868 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" name = "parsnip" version = "0.0.2" requires-python = ">=3.6" -description = "Minimal library for parsing CIF/mmCIF files in Python." +description = "Minimal library for parsing CIF & mmCIF files in Python." readme = "README.md" license = { file = "LICENSE" } authors = [ diff --git a/setup.py b/setup.py deleted file mode 100644 index 1c7fac87..00000000 --- a/setup.py +++ /dev/null @@ -1,4 +0,0 @@ -# ruff: noqa: D100 -from setuptools import setup - -setup(name="parsnip") diff --git a/tests/conftest.py b/tests/conftest.py index 3bdf6017..13411cf8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,7 +54,7 @@ def random_keys_mark(n_samples=10): aflow_mC24 = CifData( filename=data_file_path + "AFLOW_mC24.cif", - symop_keys=("_space_group_symop_id", "_space_group_symop_operation_xyz"), + symop_keys=("_space_group_symop_operation_xyz",), atom_site_keys=atom_site_keys, single_value_keys=( "_audit_creation_method", @@ -71,7 +71,7 @@ def random_keys_mark(n_samples=10): bisd_Ccmm = CifData( filename=data_file_path + "B-IncStrDb_Ccmm.cif", - symop_keys=("_space_group_symop_operation_xyz", "_space_group_symop_id"), + symop_keys=("_space_group_symop_operation_xyz",), # Our code works with extra keys, but gemmi does not! atom_site_keys=(atom_site_keys[0], *atom_site_keys[2:]), single_value_keys=( diff --git a/tests/requirements-legacy.in b/tests/requirements-legacy.in deleted file mode 100644 index c9e5797c..00000000 --- a/tests/requirements-legacy.in +++ /dev/null @@ -1,2 +0,0 @@ -gemmi==0.6.3 -pytest==7.0.1 diff --git a/tests/requirements-legacy.txt b/tests/requirements-legacy.txt deleted file mode 100644 index b101dc09..00000000 --- a/tests/requirements-legacy.txt +++ /dev/null @@ -1,24 +0,0 @@ -# -# This file is autogenerated by pip-compile with python 3.6 -# To update, run: -# -# pip-compile requirements-legacy.in -# -attrs==22.2.0 - # via pytest -gemmi==0.6.3 - # via -r tests/requirements-legacy.in -iniconfig==1.1.1 - # via pytest -packaging==21.3 - # via pytest -pluggy==1.0.0 - # via pytest -py==1.11.0 - # via pytest -pyparsing==3.1.2 - # via packaging -pytest==7.0.1 - # via -r tests/requirements-legacy.in -tomli==1.2.3 - # via pytest diff --git a/tests/requirements.in b/tests/requirements.in index f55e92da..2ba72ed5 100644 --- a/tests/requirements.in +++ b/tests/requirements.in @@ -1,2 +1,3 @@ -gemmi==0.6.6 -pytest==8.2.1 +ase +gemmi +pytest diff --git a/tests/requirements.txt b/tests/requirements.txt deleted file mode 100644 index d4911e87..00000000 --- a/tests/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --strip-extras requirements.in -# -gemmi==0.6.6 - # via -r tests/requirements.in -iniconfig==2.0.0 - # via pytest -packaging==24.0 - # via pytest -pluggy==1.5.0 - # via pytest -pytest==8.2.1 - # via -r tests/requirements.in diff --git a/tests/sample_data/B-IncStrDb_Ccmm.cif b/tests/sample_data/B-IncStrDb_Ccmm.cif index b9cb8e90..f12587b2 100644 --- a/tests/sample_data/B-IncStrDb_Ccmm.cif +++ b/tests/sample_data/B-IncStrDb_Ccmm.cif @@ -57,7 +57,7 @@ _cell_volume 548.2 _cell_formula_units_Z 4 _space_group_crystal_system orthorhombic -_space_group_name_H-M_alt 'C c m m' +_space_group_name_H-M_alt 'C m c m' loop_ _space_group_symop_id diff --git a/tests/test_key_reader.py b/tests/test_key_reader.py index f490b56c..5c7cbe4b 100644 --- a/tests/test_key_reader.py +++ b/tests/test_key_reader.py @@ -1,10 +1,10 @@ import numpy as np import pytest -from conftest import bad_cif, box_keys, cif_files_mark, random_keys_mark +from conftest import bad_cif, cif_files_mark, random_keys_mark from gemmi import cif from parsnip._errors import ParseWarning -from parsnip.parse import read_cell_params, read_key_value_pairs +from parsnip.parse import read_key_value_pairs def _gemmi_read_keys(filename, keys, as_number=True): @@ -59,13 +59,3 @@ def test_read_key_value_pairs_badcif(cif_data=bad_cif): def test_key_value_warnings(cif_data, keys=("_FALSE_KEY")): with pytest.warns(ParseWarning): _ = read_key_value_pairs(filename=cif_data.filename, keys=keys) - - -@cif_files_mark -def test_read_cell_params(cif_data, keys=box_keys): - mmcif = "PDB_4INS_head.cif" in cif_data.filename - parsnip_data = read_cell_params(filename=cif_data.filename, mmcif=mmcif) - if mmcif: - keys = (key[0] + key[1:].replace("_", ".", 1) for key in keys) - gemmi_data = _gemmi_read_keys(cif_data.filename, keys) - np.testing.assert_array_equal(parsnip_data, gemmi_data) diff --git a/tests/test_table_reader.py b/tests/test_table_reader.py index 9a883b9d..76aba8d7 100644 --- a/tests/test_table_reader.py +++ b/tests/test_table_reader.py @@ -4,7 +4,7 @@ from gemmi import cif from parsnip._errors import ParseWarning -from parsnip.parse import read_fractional_positions, read_table +from parsnip.parse import read_table def _gemmi_read_table(filename, keys): @@ -109,14 +109,3 @@ def test_bad_cif_atom_sites(cif_data=bad_cif): np.testing.assert_array_equal( parsnip_data[:, 4], ["0.25000", "0.(28510)", "0.05170", "0.41220"] ) - - -@cif_files_mark -def test_read_fractional_positions(cif_data): - if "PDB_4INS_head.cif" in cif_data.filename: - return - keys = ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z") - parsnip_data = read_fractional_positions(filename=cif_data.filename) - gemmi_data = _gemmi_read_table(cif_data.filename, keys) - gemmi_data = [[cif.as_number(val) for val in row] for row in gemmi_data] - np.testing.assert_allclose(parsnip_data, gemmi_data) diff --git a/tests/test_unitcells.py b/tests/test_unitcells.py new file mode 100644 index 00000000..eadfba5b --- /dev/null +++ b/tests/test_unitcells.py @@ -0,0 +1,92 @@ +import numpy as np +import pytest +from conftest import box_keys, cif_files_mark +from gemmi import cif + +from parsnip.unitcells import ( + extract_atomic_positions, + read_cell_params, + read_symmetry_operations, + read_wyckoff_positions, +) + + +def _gemmi_read_table(filename, keys): + return np.array(cif.read_file(filename).sole_block().find(keys)) + + +def _gemmi_read_keys(filename, keys, as_number=True): + file_block = cif.read_file(filename).sole_block() + if as_number: + return np.array([cif.as_number(file_block.find_value(key)) for key in keys]) + else: + return np.array([file_block.find_value(key) for key in keys]) + + +@cif_files_mark +def test_read_wyckoff_positions(cif_data): + if "PDB_4INS_head.cif" in cif_data.filename: + return + keys = ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z") + parsnip_data = read_wyckoff_positions(filename=cif_data.filename) + gemmi_data = _gemmi_read_table(cif_data.filename, keys) + gemmi_data = [[cif.as_number(val) for val in row] for row in gemmi_data] + np.testing.assert_allclose(parsnip_data, gemmi_data) + + +@cif_files_mark +def test_read_cell_params(cif_data, keys=box_keys): + mmcif = "PDB_4INS_head.cif" in cif_data.filename + parsnip_data = read_cell_params(filename=cif_data.filename, mmcif=mmcif) + if mmcif: + keys = (key[0] + key[1:].replace("_", ".", 1) for key in keys) + gemmi_data = _gemmi_read_keys(cif_data.filename, keys) + np.testing.assert_array_equal(parsnip_data, gemmi_data) + + +@cif_files_mark +def test_read_symmetry_operations(cif_data): + if "PDB_4INS_head.cif" in cif_data.filename: + return + + parsnip_data = read_symmetry_operations(filename=cif_data.filename) + gemmi_data = _gemmi_read_table(filename=cif_data.filename, keys=cif_data.symop_keys) + # We clean up the data for easier processing: apply the same transformation to gemmi + gemmi_data = [[item.replace(" ", "") for item in row] for row in gemmi_data] + np.testing.assert_array_equal(parsnip_data, gemmi_data) + + +@cif_files_mark +@pytest.mark.parametrize("n_decimal_places", [3, 4, 5]) +def test_extract_atomic_positions(cif_data, n_decimal_places): + import warnings + + from ase import io + from ase.build import supercells + + warnings.filterwarnings("ignore", "crystal system", category=UserWarning) + + if "PDB_4INS_head.cif" in cif_data.filename: + pytest.skip("Function not compatible with PDB data.") + + parsnip_positions = extract_atomic_positions( + filename=cif_data.filename, n_decimal_places=n_decimal_places, fractional=False + ) + + # Read the structure, then extract to Python builtin types. Then, wrap into the box + ase_file = io.read(cif_data.filename) + ase_data = supercells.make_supercell(ase_file, np.diag([1, 1, 1])) + + # Arrays must be sorted to guarantee correct comparison + parsnip_positions = np.array( + sorted(parsnip_positions.round(14), key=lambda x: (x[0], x[1], x[2])) + ) + ase_positions = np.array( + sorted(ase_data.get_positions(), key=lambda x: (x[0], x[1], x[2])) + ) + + parsnip_minmax = [parsnip_positions.min(axis=0), parsnip_positions.max(axis=0)] + ase_minmax = [ase_positions.min(axis=0), ase_positions.max(axis=0)] + np.testing.assert_allclose(parsnip_minmax, ase_minmax, atol=1e-6) + + np.testing.assert_allclose(parsnip_positions, ase_positions, atol=1e-12)