From 4b1ee1d1bbd768644b1bb9d30d1c5ed8b7e2499a Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Thu, 25 Jan 2024 21:24:19 -0500 Subject: [PATCH 01/17] unit test fix --- tests/test_anon.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/test_anon.py b/tests/test_anon.py index 6560d78..26da00d 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -47,6 +47,7 @@ def get_all_failed(): "RG1_J2KI.dcm", "RG1_J2KR.dcm", "RG1_UNCI.dcm", + "RG1_UNCR.dcm", # TODO: Tag (0018,1200) is getting replaced with date "00010101" "RG3_J2KI.dcm", "RG3_J2KI.dcm", "RG3_J2KR.dcm", @@ -93,7 +94,7 @@ def test_deleted_tags_are_removed(orig_anon_dataset): deleted_tags = dicomfields.X_TAGS for tt in deleted_tags: if len(tt) == 2 and tt in orig_ds: - assert tt not in anon_ds + assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" def test_changed_tags_are_replaced(orig_anon_dataset): @@ -112,13 +113,22 @@ def test_changed_tags_are_replaced(orig_anon_dataset): if tt in orig_ds: assert anon_ds[tt] != orig_ds[tt] +empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS -def test_empty_tags_are_emptied(orig_anon_dataset): - empty_values = (0, "", "00010101", "000000.00") - empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS +def is_elem_empty(elem) -> bool: + if elem.VR == "SQ": + for x in elem.value: + for tt in empty_tags: + if tt in x and len(x[tt].value)>0: + assert is_elem_empty(x[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty" + return True + + empty_values = (0, "", "00010101", "000000.00") + return elem.value in empty_values +def test_empty_tags_are_emptied(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset for tt in empty_tags: - if tt in orig_ds: - assert anon_ds[tt].value in empty_values + if tt in orig_ds and len(orig_ds[tt].value) > 0: + assert is_elem_empty(anon_ds[tt]), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" From 981897027120698b8331887dff9b14783746fc86 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Thu, 25 Jan 2024 21:24:38 -0500 Subject: [PATCH 02/17] pipenv setup --- .gitignore | 4 +++- Pipfile | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Pipfile diff --git a/.gitignore b/.gitignore index 529c1b9..ff4c4a7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ env __pycache__ .vscode build -*.egg-info \ No newline at end of file +*.egg-info +# Ignore Pipfile.lock, since different versions of python and OS produce different hashes +Pipfile.lock diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..4a1d299 --- /dev/null +++ b/Pipfile @@ -0,0 +1,15 @@ + +# pipenv is useful for developing local package in +# editable mode. See https://pypi.org/project/pipenv/ + +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +dicom-anonymizer = {file = ".", editable = true} + +[dev-packages] +pytest = "*" +setuptools = "*" From 387cfb4a3a3861308a89a687d213acc68ab675c9 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Fri, 26 Jan 2024 00:32:05 -0500 Subject: [PATCH 03/17] exclude DA type for deletion test --- tests/test_anon.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/tests/test_anon.py b/tests/test_anon.py index 26da00d..3a03964 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -47,7 +47,6 @@ def get_all_failed(): "RG1_J2KI.dcm", "RG1_J2KR.dcm", "RG1_UNCI.dcm", - "RG1_UNCR.dcm", # TODO: Tag (0018,1200) is getting replaced with date "00010101" "RG3_J2KI.dcm", "RG3_J2KI.dcm", "RG3_J2KR.dcm", @@ -59,18 +58,8 @@ def get_all_failed(): "test-SR.dcm", ] - # TODO: Investigate why these fail deletion test of anonymization - deleted_failed = [ - "color3d_jpeg_baseline.dcm", - "JPGLosslessP14SV1_1s_1f_8b.dcm", - "test-SR.dcm", - ] - - # TODO: Investigate why these fail emptying test of anonymization - emptied_failed = [ - "JPGLosslessP14SV1_1s_1f_8b.dcm", - "test-SR.dcm", - ] + deleted_failed = [] + emptied_failed = [] return dcmread_failed + replaced_failed + deleted_failed + emptied_failed @@ -94,7 +83,10 @@ def test_deleted_tags_are_removed(orig_anon_dataset): deleted_tags = dicomfields.X_TAGS for tt in deleted_tags: if len(tt) == 2 and tt in orig_ds: - assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" + # TODO: Investigate why Date type are replaced instead of deleted + # See item # + if orig_ds[tt].VR != "DA": + assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" def test_changed_tags_are_replaced(orig_anon_dataset): From bdaad703d3523cf5da412a91080803346b035f41 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Fri, 26 Jan 2024 00:52:48 -0500 Subject: [PATCH 04/17] fixed replacement tests --- tests/test_anon.py | 67 ++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/tests/test_anon.py b/tests/test_anon.py index 3a03964..2b13fba 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -28,34 +28,13 @@ def get_all_failed(): ] # TODO: Investigate why these fail replacement test of anonymization + # Error message: AttributeError: can't set attribute + # when attempting to anonymize tag in dicomanonymizer\simpledicomanonymizer.py:108 + # See issue ??? replaced_failed = [ - "693_J2KI.dcm", - "JPEG-lossy.dcm", - "JPEG2000-embedded-sequence-delimiter.dcm", - "JPEG2000.dcm", - "JPGExtended.dcm", - "reportsi.dcm", - "reportsi_with_empty_number_tags.dcm", - "SC_rgb_gdcm_KY.dcm", - "SC_rgb_jpeg_lossy_gdcm.dcm", - "693_UNCI.dcm", - "JPEG-LL.dcm", - "JPEG2000_UNC.dcm", - "MR2_J2KI.dcm", - "MR2_J2KR.dcm", - "MR2_UNCI.dcm", - "RG1_J2KI.dcm", - "RG1_J2KR.dcm", - "RG1_UNCI.dcm", - "RG3_J2KI.dcm", - "RG3_J2KI.dcm", - "RG3_J2KR.dcm", - "RG3_UNCI.dcm", - "SC_rgb_gdcm2k_uncompressed.dcm", - "US1_J2KI.dcm", - "US1_J2KR.dcm", - "US1_UNCI.dcm", - "test-SR.dcm", + "reportsi.dcm", + "reportsi_with_empty_number_tags.dcm", + "test-SR.dcm", ] deleted_failed = [] @@ -88,22 +67,35 @@ def test_deleted_tags_are_removed(orig_anon_dataset): if orig_ds[tt].VR != "DA": assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" +changed_tags = ( + dicomfields.U_TAGS + + dicomfields.D_TAGS + + dicomfields.Z_D_TAGS + + dicomfields.X_D_TAGS + + dicomfields.X_Z_D_TAGS + + dicomfields.X_Z_U_STAR_TAGS +) + +empty_values = (0, "", "00010101", "000000.00") + +def is_elem_replaced(orig, anon) -> bool: + if orig.VR == "SQ": + for (x, y) in zip(orig.value, anon.value): + for tt in changed_tags: + if tt in x and len(x[tt].value)>0: + assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!" + assert is_elem_replaced(x[tt], y[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced" + return True -def test_changed_tags_are_replaced(orig_anon_dataset): - changed_tags = ( - dicomfields.U_TAGS - + dicomfields.D_TAGS - + dicomfields.Z_D_TAGS - + dicomfields.X_D_TAGS - + dicomfields.X_Z_D_TAGS - + dicomfields.X_Z_U_STAR_TAGS - ) + return orig.value != anon.value if orig.value not in empty_values else True +def test_changed_tags_are_replaced(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset for tt in changed_tags: if tt in orig_ds: - assert anon_ds[tt] != orig_ds[tt] + assert tt in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!" + assert is_elem_replaced(orig_ds[tt], anon_ds[tt]), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced" empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS @@ -115,7 +107,6 @@ def is_elem_empty(elem) -> bool: assert is_elem_empty(x[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty" return True - empty_values = (0, "", "00010101", "000000.00") return elem.value in empty_values def test_empty_tags_are_emptied(orig_anon_dataset): From f5bfe2c516f30216a959b702ce28741fb1f79da5 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Fri, 26 Jan 2024 00:57:15 -0500 Subject: [PATCH 05/17] black formatting black formatting black formatting --- examples/anonymize_dataset.py | 6 +++- examples/anonymize_extra_rules.py | 29 +++++++++++---- tests/test_anon.py | 44 ++++++++++++++++------- tests/test_anonymization_without_dicom.py | 29 +++++++-------- 4 files changed, 74 insertions(+), 34 deletions(-) diff --git a/examples/anonymize_dataset.py b/examples/anonymize_dataset.py index 4c748ce..febe396 100644 --- a/examples/anonymize_dataset.py +++ b/examples/anonymize_dataset.py @@ -2,13 +2,17 @@ from pydicom.data import get_testdata_file from pydicom import dcmread + def main(): original_ds = dcmread(get_testdata_file("CT_small.dcm")) data_ds = original_ds.copy() - anonymize_dataset(data_ds, delete_private_tags=True) # Anonymization is done in-place + anonymize_dataset( + data_ds, delete_private_tags=True + ) # Anonymization is done in-place print("Examples of original -> anonymized values:") for tt in ["PatientName", "PatientID", "StudyDate"]: print(f" {tt}: '{original_ds[tt].value}' -> '{data_ds[tt].value}'") + if __name__ == "__main__": main() diff --git a/examples/anonymize_extra_rules.py b/examples/anonymize_extra_rules.py index 6ad1b55..13cdb74 100644 --- a/examples/anonymize_extra_rules.py +++ b/examples/anonymize_extra_rules.py @@ -1,11 +1,22 @@ import argparse from dicomanonymizer import ALL_TAGS, anonymize, keep + def main(): parser = argparse.ArgumentParser(add_help=True) - parser.add_argument('input', help='Path to the input dicom file or input directory which contains dicom files') - parser.add_argument('output', help='Path to the output dicom file or output directory which will contains dicom files') - parser.add_argument('--suffix', action='store', help='Suffix that will be added at the end of series description') + parser.add_argument( + "input", + help="Path to the input dicom file or input directory which contains dicom files", + ) + parser.add_argument( + "output", + help="Path to the output dicom file or output directory which will contains dicom files", + ) + parser.add_argument( + "--suffix", + action="store", + help="Suffix that will be added at the end of series description", + ) args = parser.parse_args() input_dicom_path = args.input @@ -16,7 +27,7 @@ def main(): def setup_series_description(dataset, tag): element = dataset.get(tag) if element is not None: - element.value = f'{element.value}-{args.suffix}' + element.value = f"{element.value}-{args.suffix}" # ALL_TAGS variable is defined on file dicomfields.py # the 'keep' method is already defined into the dicom-anonymizer @@ -28,7 +39,13 @@ def setup_series_description(dataset, tag): extra_anonymization_rules[(0x0008, 0x103E)] = setup_series_description # Launch the anonymization - anonymize(input_dicom_path, output_dicom_path, extra_anonymization_rules, delete_private_tags=False) + anonymize( + input_dicom_path, + output_dicom_path, + extra_anonymization_rules, + delete_private_tags=False, + ) + -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/tests/test_anon.py b/tests/test_anon.py index 2b13fba..86b8a9d 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -32,9 +32,9 @@ def get_all_failed(): # when attempting to anonymize tag in dicomanonymizer\simpledicomanonymizer.py:108 # See issue ??? replaced_failed = [ - "reportsi.dcm", - "reportsi_with_empty_number_tags.dcm", - "test-SR.dcm", + "reportsi.dcm", + "reportsi_with_empty_number_tags.dcm", + "test-SR.dcm", ] deleted_failed = [] @@ -65,7 +65,10 @@ def test_deleted_tags_are_removed(orig_anon_dataset): # TODO: Investigate why Date type are replaced instead of deleted # See item # if orig_ds[tt].VR != "DA": - assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" + assert ( + tt not in anon_ds + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" + changed_tags = ( dicomfields.U_TAGS @@ -76,42 +79,57 @@ def test_deleted_tags_are_removed(orig_anon_dataset): + dicomfields.X_Z_U_STAR_TAGS ) -empty_values = (0, "", "00010101", "000000.00") +empty_values = (0, "", "00010101", "000000.00") + def is_elem_replaced(orig, anon) -> bool: if orig.VR == "SQ": - for (x, y) in zip(orig.value, anon.value): + for x, y in zip(orig.value, anon.value): for tt in changed_tags: - if tt in x and len(x[tt].value)>0: + if tt in x and len(x[tt].value) > 0: assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!" - assert is_elem_replaced(x[tt], y[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced" + assert is_elem_replaced( + x[tt], y[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced" return True return orig.value != anon.value if orig.value not in empty_values else True + def test_changed_tags_are_replaced(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset for tt in changed_tags: if tt in orig_ds: - assert tt in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!" - assert is_elem_replaced(orig_ds[tt], anon_ds[tt]), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced" + assert ( + tt in anon_ds + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!" + assert is_elem_replaced( + orig_ds[tt], anon_ds[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced" + empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS + def is_elem_empty(elem) -> bool: if elem.VR == "SQ": for x in elem.value: for tt in empty_tags: - if tt in x and len(x[tt].value)>0: - assert is_elem_empty(x[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty" + if tt in x and len(x[tt].value) > 0: + assert is_elem_empty( + x[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty" return True return elem.value in empty_values + def test_empty_tags_are_emptied(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset for tt in empty_tags: if tt in orig_ds and len(orig_ds[tt].value) > 0: - assert is_elem_empty(anon_ds[tt]), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" + assert is_elem_empty( + anon_ds[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" diff --git a/tests/test_anonymization_without_dicom.py b/tests/test_anonymization_without_dicom.py index 2c5a0b5..f9264b9 100644 --- a/tests/test_anonymization_without_dicom.py +++ b/tests/test_anonymization_without_dicom.py @@ -2,24 +2,25 @@ from dicomanonymizer import anonymize_dataset + def test_anonymization_without_dicom_file(): # Create a list of tags object that should contains id, type and value fields = [ - { # Replaced by Anonymized - 'id': (0x0040, 0xA123), - 'type': 'LO', - 'value': 'Annie de la Fontaine', + { # Replaced by Anonymized + "id": (0x0040, 0xA123), + "type": "LO", + "value": "Annie de la Fontaine", + }, + { # Replaced with empty value + "id": (0x0008, 0x0050), + "type": "TM", + "value": "bar", }, - { # Replaced with empty value - 'id': (0x0008, 0x0050), - 'type': 'TM', - 'value': 'bar', + { # Deleted + "id": (0x0018, 0x4000), + "type": "VR", + "value": "foo", }, - { # Deleted - 'id': (0x0018, 0x4000), - 'type': 'VR', - 'value': 'foo', - } ] # Create a readable dataset for pydicom @@ -27,6 +28,6 @@ def test_anonymization_without_dicom_file(): # Add each field into the dataset for field in fields: - data.add_new(field['id'], field['type'], field['value']) + data.add_new(field["id"], field["type"], field["value"]) anonymize_dataset(data) From da91c4ff9e68a95ad3cae25fd30063957f29c21b Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Fri, 26 Jan 2024 01:12:00 -0500 Subject: [PATCH 06/17] silenced sourcery warnings --- tests/test_anon.py | 20 +++++++++++++------- tests/test_anonymization_without_dicom.py | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/test_anon.py b/tests/test_anon.py index 86b8a9d..a9c867e 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -60,11 +60,15 @@ def orig_anon_dataset(request): def test_deleted_tags_are_removed(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset deleted_tags = dicomfields.X_TAGS - for tt in deleted_tags: - if len(tt) == 2 and tt in orig_ds: + + for tt in deleted_tags: # sourcery skip: no-loop-in-tests + if ( + len(tt) == 2 and tt in orig_ds + ): # sourcery skip: merge-nested-ifs, no-conditionals-in-tests # TODO: Investigate why Date type are replaced instead of deleted # See item # - if orig_ds[tt].VR != "DA": + + if orig_ds[tt].VR != "DA": # sourcery skip: no-conditionals-in-tests assert ( tt not in anon_ds ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" @@ -99,8 +103,8 @@ def is_elem_replaced(orig, anon) -> bool: def test_changed_tags_are_replaced(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset - for tt in changed_tags: - if tt in orig_ds: + for tt in changed_tags: # sourcery skip: no-loop-in-tests + if tt in orig_ds: # sourcery skip: no-conditionals-in-tests assert ( tt in anon_ds ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!" @@ -128,8 +132,10 @@ def is_elem_empty(elem) -> bool: def test_empty_tags_are_emptied(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset - for tt in empty_tags: - if tt in orig_ds and len(orig_ds[tt].value) > 0: + for tt in empty_tags: # sourcery skip: no-loop-in-tests + if ( + tt in orig_ds and len(orig_ds[tt].value) > 0 + ): # sourcery skip: no-conditionals-in-tests assert is_elem_empty( anon_ds[tt] ), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" diff --git a/tests/test_anonymization_without_dicom.py b/tests/test_anonymization_without_dicom.py index f9264b9..7420c39 100644 --- a/tests/test_anonymization_without_dicom.py +++ b/tests/test_anonymization_without_dicom.py @@ -27,7 +27,7 @@ def test_anonymization_without_dicom_file(): data = pydicom.Dataset() # Add each field into the dataset - for field in fields: + for field in fields: # sourcery skip: no-loop-in-tests data.add_new(field["id"], field["type"], field["value"]) anonymize_dataset(data) From 2f07e1770259339db13aa9e08b9ee8616b76162a Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Fri, 26 Jan 2024 01:14:41 -0500 Subject: [PATCH 07/17] support spellchecker --- cspell.json | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 cspell.json diff --git a/cspell.json b/cspell.json new file mode 100644 index 0000000..cc89f42 --- /dev/null +++ b/cspell.json @@ -0,0 +1,30 @@ +// cSpell Settings +{ + // Version of the setting file. Always 0.2 + "version": "0.2", + // language - current active spelling language + "language": "en", + // ignorepaths - list of glob patterns for files to ignore + "ignorePaths": [ + "ext", + "*.json", + "Pipfile.lock", + "**/tests" + ], + // words - list of words to be always considered correct + "words": [ + "dcmread", + "DICOM", + "dicomfields", + "multival", + "pydicom", + "simpledicomanonymizer", + "xgggg" + ] + // flagWords - list of words to be always considered incorrect + // This is useful for offensive words and common spelling errors. + // For example "hte" should be "the" + // "flagWords": [ + // "hte" + // ] +} From 893ce24fc5b7d7bf552ec351adbfe58881668ab1 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Fri, 26 Jan 2024 01:20:57 -0500 Subject: [PATCH 08/17] typos and spellcheck --- cspell.json | 19 +++++++++++++++++++ dicomanonymizer/anonymizer.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cspell.json b/cspell.json index cc89f42..267e407 100644 --- a/cspell.json +++ b/cspell.json @@ -13,12 +13,31 @@ ], // words - list of words to be always considered correct "words": [ + "anonymization", + "argparse", + "bdist", "dcmread", + "DICM", "DICOM", + "dicomanonymizer", "dicomfields", + "Edern", + "Fiducials", + "Fontaine", + "Haumont", + "Kitware", + "Laurenn", "multival", + "pipenv", + "Pipfile", "pydicom", + "pytest", + "Radiopharmaceutical", + "setuptools", "simpledicomanonymizer", + "tqdm", + "venv", + "virtualenv", "xgggg" ] // flagWords - list of words to be always considered incorrect diff --git a/dicomanonymizer/anonymizer.py b/dicomanonymizer/anonymizer.py index 6a457fd..6b2d234 100644 --- a/dicomanonymizer/anonymizer.py +++ b/dicomanonymizer/anonymizer.py @@ -26,7 +26,7 @@ def anonymize(input_path: str, output_path: str, anonymization_actions: dict, de Read data from input path (folder or file) and launch the anonymization. :param input_path: Path to a folder or to a file. If set to a folder, - then cross all over subfiles and apply anonymization. + then cross all over sub-folders and apply anonymization. :param output_path: Path to a folder or to a file. :param anonymization_actions: List of actions that will be applied on tags. :param deletePrivateTags: Whether to delete private tags. From 9af7d7cf62f70c7eebd64642edb9be27d248bfae Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Fri, 26 Jan 2024 02:27:57 -0500 Subject: [PATCH 09/17] added issue number and link --- tests/test_anon.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_anon.py b/tests/test_anon.py index a9c867e..ddb9760 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -30,7 +30,7 @@ def get_all_failed(): # TODO: Investigate why these fail replacement test of anonymization # Error message: AttributeError: can't set attribute # when attempting to anonymize tag in dicomanonymizer\simpledicomanonymizer.py:108 - # See issue ??? + # See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/63 replaced_failed = [ "reportsi.dcm", "reportsi_with_empty_number_tags.dcm", @@ -66,8 +66,7 @@ def test_deleted_tags_are_removed(orig_anon_dataset): len(tt) == 2 and tt in orig_ds ): # sourcery skip: merge-nested-ifs, no-conditionals-in-tests # TODO: Investigate why Date type are replaced instead of deleted - # See item # - + # See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/56 if orig_ds[tt].VR != "DA": # sourcery skip: no-conditionals-in-tests assert ( tt not in anon_ds From ad6b05a11ee5c8048b1fa004578e526d3fbd0c5e Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Fri, 26 Jan 2024 02:39:35 -0500 Subject: [PATCH 10/17] behaviour is correct spelling --- cspell.json | 1 + 1 file changed, 1 insertion(+) diff --git a/cspell.json b/cspell.json index 267e407..3b687ee 100644 --- a/cspell.json +++ b/cspell.json @@ -16,6 +16,7 @@ "anonymization", "argparse", "bdist", + "behaviour", "dcmread", "DICM", "DICOM", From bc08b06253f4f4bd163091e246a92316cd0b8833 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Thu, 22 Feb 2024 11:36:00 -0500 Subject: [PATCH 11/17] Fix for #63, can't set attribute --- dicomanonymizer/simpledicomanonymizer.py | 14 +++++++++++--- tests/test_anon.py | 13 ++----------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py index a57625c..cece7f6 100644 --- a/dicomanonymizer/simpledicomanonymizer.py +++ b/dicomanonymizer/simpledicomanonymizer.py @@ -105,7 +105,7 @@ def replace_element(element): See https://laurelbridge.com/pdf/Dicom-Anonymization-Conformance-Statement.pdf """ if element.VR in ('LO', 'LT', 'SH', 'PN', 'CS', 'ST', 'UT'): - element.value = 'Anonymized' + element.value = 'ANONYMIZED' # CS VR accepts only uppercase characters elif element.VR == 'UI': replace_element_UID(element) elif element.VR in ('DS', 'IS'): @@ -118,8 +118,16 @@ def replace_element(element): element.value = b'Anonymized' elif element.VR == 'SQ': for sub_dataset in element.value: - for sub_element in sub_dataset.elements(): - replace_element(sub_element) + for sub_element in sub_dataset.elements(): + if type(sub_element) == pydicom.dataelem.RawDataElement: + # RawDataElement is a NamedTuple, so cannot set its value attribute. + # Convert it to a DataElement, replace value, and set it back. + # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63 + e2 = pydicom.dataelem.DataElement_from_raw(sub_element) + replace_element(e2) + sub_dataset.add(e2) + else: + replace_element(sub_element) else: raise NotImplementedError('Not anonymized. VR {} not yet implemented.'.format(element.VR)) diff --git a/tests/test_anon.py b/tests/test_anon.py index ddb9760..d9b95cc 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -27,16 +27,7 @@ def get_all_failed(): "OT-PAL-8-face.dcm", ] - # TODO: Investigate why these fail replacement test of anonymization - # Error message: AttributeError: can't set attribute - # when attempting to anonymize tag in dicomanonymizer\simpledicomanonymizer.py:108 - # See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/63 - replaced_failed = [ - "reportsi.dcm", - "reportsi_with_empty_number_tags.dcm", - "test-SR.dcm", - ] - + replaced_failed = [] deleted_failed = [] emptied_failed = [] return dcmread_failed + replaced_failed + deleted_failed + emptied_failed @@ -82,7 +73,7 @@ def test_deleted_tags_are_removed(orig_anon_dataset): + dicomfields.X_Z_U_STAR_TAGS ) -empty_values = (0, "", "00010101", "000000.00") +empty_values = (0, "", "00010101", "000000.00", "ANONYMIZED") def is_elem_replaced(orig, anon) -> bool: From 0872799c108d899919e0b19aa2df96938cae31ad Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Thu, 22 Feb 2024 15:33:23 -0500 Subject: [PATCH 12/17] Suppressed filename copying warning --- tests/test_anon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_anon.py b/tests/test_anon.py index d9b95cc..13b908f 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -43,6 +43,7 @@ def get_passing_files(): @pytest.fixture(scope="module", params=get_passing_files()) def orig_anon_dataset(request): orig_ds = dcmread(request.param) + orig_ds.filename = None # Non-None value causes warnings in copy(). Not needed for this testing anon_ds = orig_ds.copy() anonymize_dataset(anon_ds) return (orig_ds, anon_ds) From 428ccb304dfcc2698363b32ac3ae31e4406a8a0e Mon Sep 17 00:00:00 2001 From: Paul Choisel Date: Mon, 15 Jan 2024 13:31:24 +0100 Subject: [PATCH 13/17] ENH: Add dicom fields scrapping script --- .gitignore | 1 + pyproject.toml | 3 + scripts/README.md | 12 ++++ scripts/scrap_DICOM_fields.py | 111 ++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+) create mode 100644 scripts/README.md create mode 100644 scripts/scrap_DICOM_fields.py diff --git a/.gitignore b/.gitignore index ff4c4a7..7d27825 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ build *.egg-info # Ignore Pipfile.lock, since different versions of python and OS produce different hashes Pipfile.lock +.python-version diff --git a/pyproject.toml b/pyproject.toml index 026195b..886926e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,9 @@ dependencies = [ dev = [ "pytest", "setuptools", # Needed to load pydicom's test files + "bs4", + "fire", + "requests" ] [project.scripts] diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..1a11a6e --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,12 @@ +# Script folder + +This folder contains utility scripts for the maintenance of the package. + +## scrap_DICOM_fields.py + +This script downloads a web page and tries to scrap the DICOM fields and their anonymization command from it. + +1. Pull the repository: `git clone https://github.com/KitwareMedical/dicom-anonymizer.git` +1. Go in the repository: `cd dicom-anonymizer` +1. Install the dependencies: `pip install -e '.[dev]'` +1. Run the script: `python scripts/scrap_DICOM_fields.py` (Run it with `-h` to get a list of arguments) \ No newline at end of file diff --git a/scripts/scrap_DICOM_fields.py b/scripts/scrap_DICOM_fields.py new file mode 100644 index 0000000..66f742f --- /dev/null +++ b/scripts/scrap_DICOM_fields.py @@ -0,0 +1,111 @@ +""" +Download a web page and try to scrap the DICOM fields and their anonymization command from it. + +Written by Mohammad Khawar Zia +""" + +import fire +import requests + +from collections import defaultdict +from bs4 import BeautifulSoup + + +dicom_fields_header = """# Tags anonymized in DICOM standard +# Documentation for groups meaning can be found in default associated actions. +# https://dicom.nema.org/medical/dicom/current/output/chtml/part15/chapter_e.html + +""" + +dicom_fields_footer = """# Contains all previous tags into one array +ALL_TAGS = [] +ALL_TAGS.extend(D_TAGS) +ALL_TAGS.extend(Z_TAGS) +ALL_TAGS.extend(X_TAGS) +ALL_TAGS.extend(U_TAGS) +ALL_TAGS.extend(Z_D_TAGS) +ALL_TAGS.extend(X_Z_TAGS) +ALL_TAGS.extend(X_D_TAGS) +ALL_TAGS.extend(X_Z_D_TAGS) +ALL_TAGS.extend(X_Z_U_STAR_TAGS) +""" + + +def scrap_profiles(url): + page = requests.get(url) + soup = BeautifulSoup(page.content, "html.parser") + + headers = [th.text for th in soup.find(attrs={'id': 'table_E.1-1'}).parent.find('table').find('thead').find_all('strong')] + data = [] + + + for tr in soup.find(attrs={'id': 'table_E.1-1'}).parent.find('table').find('tbody').find_all('tr'): + tmp = {key: value.text.strip() for key, value in dict(zip(headers, tr.find_all('td'))).items() if key in ['Attribute Name', 'Tag', 'Basic Prof.']} + tmp2 = (tmp.get('Tag'), tmp.get('Attribute Name'), tmp.get('Basic Prof.')) + data.append(tmp2) + + data = sorted(data, key=lambda ele: (ele[2], ele[1])) + + + profiles = defaultdict(list) + fields_to_skip = { + 'Private Attributes', + } + for tag, name, profile in data: + if name in fields_to_skip: + continue + + if name == 'Curve Data': + new_tag = '(0x5000, 0x0000, 0xFF00, 0x0000)' + elif name == 'Overlay Comments': + new_tag = '(0x6000, 0x4000, 0xFF00, 0xFFFF)' + elif name == 'Overlay Data': + new_tag = '(0x6000, 0x3000, 0xFF00, 0xFFFF)' + else: + new_tag = list(tag) + new_tag.insert(6, '0x') + new_tag.insert(6, ' ') + new_tag.insert(1, '0x') + new_tag = ''.join(new_tag) + + name = name.replace('\u200b', '').replace('\n', '') + string = f'{new_tag}, # {name}' + profiles[profile].append(string) + + return profiles + + +def create_DICOM_fields(profiles): + dicom_fields = "" + for tag, tag_list, comment in ( + ('D', 'D_TAGS', '# Replaced tags'), + ('Z', 'Z_TAGS', "# Replaced with empty values (0, '', ...)"), + ('X', 'X_TAGS', '# Deleted tags'), + ('U', 'U_TAGS', '# Replace UID'), + + ('Z/D', 'Z_D_TAGS', '# Replace element according to the VR'), + ('X/Z', 'X_Z_TAGS', '# Set the value to empty according to the VR'), + ('X/D', 'X_D_TAGS', "# Replace element according to the VR"), + + ('X/Z/D', 'X_Z_D_TAGS', '# Replace element according to the VR'), + ('X/Z/U*', 'X_Z_U_STAR_TAGS', + '# Replace element with UI as VR, else replace according to VR with empty values'), + ): + dicom_fields += f'{comment}\n{tag_list} = [\n' + for profile in profiles.get(tag): + dicom_fields += f' {profile}\n' + dicom_fields += ']\n\n' + + return dicom_fields_header + dicom_fields + dicom_fields_footer + + +def main( + url="https://dicom.nema.org/medical/dicom/current/output/chtml/part15/chapter_e.html", + output_path='dicomanonymizer/dicomfields.py'): + profiles = scrap_profiles(url) + file_content = create_DICOM_fields(profiles=profiles) + with open(output_path, 'w') as file: + file.write(file_content) + +if __name__ == '__main__': + fire.Fire(main) \ No newline at end of file From 2aae45fdb1957579431d30a604c2a6b90e7abff2 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Mon, 26 Feb 2024 13:01:07 -0500 Subject: [PATCH 14/17] isinstance instead of type --- dicomanonymizer/simpledicomanonymizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py index cece7f6..518570a 100644 --- a/dicomanonymizer/simpledicomanonymizer.py +++ b/dicomanonymizer/simpledicomanonymizer.py @@ -119,7 +119,7 @@ def replace_element(element): elif element.VR == 'SQ': for sub_dataset in element.value: for sub_element in sub_dataset.elements(): - if type(sub_element) == pydicom.dataelem.RawDataElement: + if isinstance(sub_element, pydicom.dataelem.RawDataElement): # RawDataElement is a NamedTuple, so cannot set its value attribute. # Convert it to a DataElement, replace value, and set it back. # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63 From ae2646a9dec2e234ab2d9bfc9bcf518b2a2e249a Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Mon, 26 Feb 2024 13:03:11 -0500 Subject: [PATCH 15/17] formatting, spell-check, issue linking --- tests/test_anon.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_anon.py b/tests/test_anon.py index 13b908f..b5e003a 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -16,7 +16,7 @@ warnings.filterwarnings("ignore") -def get_all_failed(): +def get_all_failed(): # sourcery skip: inline-immediately-returned-variable # The following files are intended to fail dcmread # No point including them for anonymization testing dcmread_failed = [ @@ -27,10 +27,7 @@ def get_all_failed(): "OT-PAL-8-face.dcm", ] - replaced_failed = [] - deleted_failed = [] - emptied_failed = [] - return dcmread_failed + replaced_failed + deleted_failed + emptied_failed + return dcmread_failed @lru_cache(maxsize=None) From 69890cfcdcfd8dbdf85cf686736d3f192ad89670 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Thu, 22 Feb 2024 11:36:00 -0500 Subject: [PATCH 16/17] Fix for #63, can't set attribute --- dicomanonymizer/simpledicomanonymizer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py index 518570a..83b6c5e 100644 --- a/dicomanonymizer/simpledicomanonymizer.py +++ b/dicomanonymizer/simpledicomanonymizer.py @@ -119,7 +119,11 @@ def replace_element(element): elif element.VR == 'SQ': for sub_dataset in element.value: for sub_element in sub_dataset.elements(): +<<<<<<< HEAD if isinstance(sub_element, pydicom.dataelem.RawDataElement): +======= + if type(sub_element) == pydicom.dataelem.RawDataElement: +>>>>>>> ee535a8 (Fix for #63, can't set attribute) # RawDataElement is a NamedTuple, so cannot set its value attribute. # Convert it to a DataElement, replace value, and set it back. # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63 From 118921076e0015afcbc1226e2bf938ce7fc35b43 Mon Sep 17 00:00:00 2001 From: Sanjay Joshi Date: Mon, 26 Feb 2024 13:01:07 -0500 Subject: [PATCH 17/17] isinstance instead of type --- dicomanonymizer/simpledicomanonymizer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py index 83b6c5e..518570a 100644 --- a/dicomanonymizer/simpledicomanonymizer.py +++ b/dicomanonymizer/simpledicomanonymizer.py @@ -119,11 +119,7 @@ def replace_element(element): elif element.VR == 'SQ': for sub_dataset in element.value: for sub_element in sub_dataset.elements(): -<<<<<<< HEAD if isinstance(sub_element, pydicom.dataelem.RawDataElement): -======= - if type(sub_element) == pydicom.dataelem.RawDataElement: ->>>>>>> ee535a8 (Fix for #63, can't set attribute) # RawDataElement is a NamedTuple, so cannot set its value attribute. # Convert it to a DataElement, replace value, and set it back. # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63