Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
656e9e1
Add _str2num and _deg2rad _utils
janbridley Apr 5, 2024
1e74eb7
Add cif file keys list to sample data
janbridley Apr 5, 2024
c369fd1
Add key_value_pairs reader and cell_params reader to parse
janbridley Apr 5, 2024
672c4e3
Add tests for key reader
janbridley Apr 5, 2024
e0b693f
Add tests for new utils
janbridley Apr 5, 2024
79350fc
Reorder test_key_reader
janbridley Apr 5, 2024
04b3344
Improve documentation for regex
janbridley Apr 5, 2024
b59eab1
Add warnings and tests to read_key_value_pairs
janbridley Apr 5, 2024
87303b9
Restore trailing spaces to downloaded CIF files
janbridley Apr 8, 2024
90120c7
Properly track keys containing "-"
janbridley Apr 8, 2024
d4203da
Improved tests for key value pair reader
janbridley Apr 8, 2024
8c3c014
Add key-value tests for INTENTIONALLY_BAD_CIF.cif
janbridley Apr 8, 2024
9c91bde
Fix docs
janbridley Apr 8, 2024
9aaba90
Enable top of page button
janbridley Apr 8, 2024
6ea7882
Update brand primary colors
janbridley Apr 8, 2024
0169783
Improve docs for parse.py
janbridley Apr 8, 2024
a404d19
Add __future__.annotations imports to relevant files
janbridley Apr 9, 2024
4903f80
Fix typo
janbridley Apr 10, 2024
a333c5c
Seperate _errors from _templates
janbridley Apr 10, 2024
b0f386b
Clean up docstring return types
janbridley Apr 10, 2024
96acd85
Add PDB cif to test suite
janbridley Apr 10, 2024
a6ebf33
Fix test in test_key_reader
janbridley Apr 10, 2024
f8dbaa3
Clean up patterns.py and add remove_nondelimiting_whitespace
janbridley Apr 10, 2024
b1e0bdd
Update table_reader to use remove_nondelimiting_whitespace
janbridley Apr 10, 2024
51328be
Allow value reader to read mmCIF files
janbridley Apr 10, 2024
06abb57
Update test_table_reader.py
janbridley Apr 10, 2024
98a2201
Remove seperate mmCIF reader
janbridley Apr 10, 2024
93909f8
Add docs for patterns module
janbridley Apr 10, 2024
d4d931b
Fix cast_to_float default value
janbridley Apr 10, 2024
1d86db9
Update docs
janbridley Apr 10, 2024
0528d36
Add documentation for __call__
janbridley Apr 10, 2024
40c7fb8
Update regex_filter param documentation
janbridley Apr 10, 2024
56c1e21
Fix typo
janbridley Apr 10, 2024
853a166
Remove unneeded comment
janbridley Apr 10, 2024
8b19268
Fix default values in docs
janbridley Apr 10, 2024
fd295a8
Fix typo
janbridley Apr 10, 2024
3e5e77c
Minor doc fix
janbridley Apr 10, 2024
ffa59a7
Fix typo
janbridley Apr 10, 2024
7f80005
Remove duplicate Introduction from index
janbridley Apr 10, 2024
1e8c01d
Remove duplicate entries from toc
janbridley Apr 10, 2024
56d80de
Add source for PDB cif
janbridley Apr 10, 2024
5d47d10
Add mmCIF flag to read_cell_params
janbridley Apr 10, 2024
dfbf5ed
Add quickstart.rst
janbridley Apr 10, 2024
28a7025
Fix comment in quickstart
janbridley Apr 10, 2024
e60cd1b
Remove unnecessary line in quickstart
janbridley Apr 10, 2024
6e82566
Fix image path in README.rst
janbridley Apr 11, 2024
a772261
Update regex documentation
janbridley Apr 11, 2024
7d03311
Fix CI
janbridley Apr 11, 2024
bbe6426
Documentation fix
janbridley Apr 25, 2024
836f465
Documentation fix for regex filter
janbridley Apr 25, 2024
81a2f98
Comment fixes
janbridley Apr 25, 2024
d506df3
Fix #8
janbridley Apr 25, 2024
975f9b0
Fix typo in _parsed_line_generator docs
janbridley Apr 25, 2024
0c48708
Typo fix
janbridley Apr 25, 2024
8eb5f2e
Move tip block comment
janbridley Apr 25, 2024
343dc90
Merge branch 'main' into feature/read-values
janbridley May 13, 2024
d71119a
Untrack cif files from end-of-file-fixer
janbridley May 16, 2024
cc23120
Merge remote-tracking branch 'origin/main' into feature/read-value
janbridley May 22, 2024
75f8c3e
Add missing key to CifData namedtuple
janbridley May 22, 2024
855f2c5
Remove __future__ annotations
janbridley May 22, 2024
60fcbe7
Remove type | type
janbridley May 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update table_reader to use remove_nondelimiting_whitespace
  • Loading branch information
janbridley committed Apr 10, 2024
commit b1e0bddf26a9fe4ad4871454fa6c6d4b7c2366af
30 changes: 24 additions & 6 deletions parsnip/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@

from ._errors import ParseError, ParseWarning
from ._utils import _deg2rad, _str2num
from .patterns import LineCleaner, cast_array_to_float
from .patterns import LineCleaner, cast_array_to_float, remove_nondelimiting_whitespace


def _remove_comments_from_line(line):
Expand All @@ -63,22 +63,33 @@ def _remove_comments_from_line(line):
def read_table(
filename: str,
keys: str,
filter_line: tuple[str, str] = (r",\s+", ","),
nondelimiting_whitespace_replacement: str = "_",
regex_filter: tuple[str, str] | None = None,
keep_original_key_order=False,
) -> np.ndarray[str]:
r"""Extract data from a CIF file loop\_ table.

.. tip::

CIF tables are whitespace delimited - however, values enclosed in quotation
marks may also contain whitespace characters. The parameter
``nondelimiting_whitespace_replacement`` handles this possibility by replacing
nondelimiting whitespaces with underscores. This value can be also be set to an
empty string, or any arbitrary sequence of characters.

Args:
filename (str):
The name of the .cif file to be parsed.
keys (tuple[str]):
The names of the keys to be parsed. The columns associated with these keys
will be returned in the final array.
filter_line (tuple[str,str], optional):
nondelimiting_whitespace_replacement (str, optional):
Character to replace non-delimiting whitespaces with. By default,
regex_filter (tuple[str,str], optional):
A tuple of strings that are compiled to a regex filter and applied to each
data line. If a tuple of tuples of strings is provided instead, each pattern
will be applied seperately.
Default value = ``((r",\s+",","))``
Default value = ``None``
keep_original_key_order (bool, optional):
When True, preserve the order of keys in the table from the cif file.
When False, return columns of data in order of the input ``keys`` arg.
Expand Down Expand Up @@ -109,7 +120,8 @@ def read_table(
with open(filename) as f:
tables = f.read().split("loop_")

line_cleaner = LineCleaner(filter_line)
if regex_filter is not None:
line_cleaner = LineCleaner(regex_filter)
nontable_line_prefixes = ("_", "#")

for table in tables:
Expand Down Expand Up @@ -141,7 +153,13 @@ def read_table(
# If we exit the header and enter the table body
if data_column_indices and (line[:1] not in nontable_line_prefixes):
in_header = False # Exit the header and start writing data
clean_line = line_cleaner(line)

if regex_filter is not None: # Apply user-defined regex, if present
line = line_cleaner(line)

clean_line = remove_nondelimiting_whitespace(
line.strip(), replacement=nondelimiting_whitespace_replacement
)
split_line = clean_line.split()

# Only add data if the line has at least as many columns as required.
Expand Down