diff --git a/.gitignore b/.gitignore index 5fe5836..7d27825 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ __pycache__ .vscode build *.egg-info -.python-version \ No newline at end of file +# Ignore Pipfile.lock, since different versions of python and OS produce different hashes +Pipfile.lock +.python-version diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..4a1d299 --- /dev/null +++ b/Pipfile @@ -0,0 +1,15 @@ + +# pipenv is useful for developing local package in +# editable mode. See https://pypi.org/project/pipenv/ + +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +dicom-anonymizer = {file = ".", editable = true} + +[dev-packages] +pytest = "*" +setuptools = "*" diff --git a/cspell.json b/cspell.json new file mode 100644 index 0000000..3b687ee --- /dev/null +++ b/cspell.json @@ -0,0 +1,50 @@ +// cSpell Settings +{ + // Version of the setting file. Always 0.2 + "version": "0.2", + // language - current active spelling language + "language": "en", + // ignorepaths - list of glob patterns for files to ignore + "ignorePaths": [ + "ext", + "*.json", + "Pipfile.lock", + "**/tests" + ], + // words - list of words to be always considered correct + "words": [ + "anonymization", + "argparse", + "bdist", + "behaviour", + "dcmread", + "DICM", + "DICOM", + "dicomanonymizer", + "dicomfields", + "Edern", + "Fiducials", + "Fontaine", + "Haumont", + "Kitware", + "Laurenn", + "multival", + "pipenv", + "Pipfile", + "pydicom", + "pytest", + "Radiopharmaceutical", + "setuptools", + "simpledicomanonymizer", + "tqdm", + "venv", + "virtualenv", + "xgggg" + ] + // flagWords - list of words to be always considered incorrect + // This is useful for offensive words and common spelling errors. + // For example "hte" should be "the" + // "flagWords": [ + // "hte" + // ] +} diff --git a/dicomanonymizer/anonymizer.py b/dicomanonymizer/anonymizer.py index 6a457fd..6b2d234 100644 --- a/dicomanonymizer/anonymizer.py +++ b/dicomanonymizer/anonymizer.py @@ -26,7 +26,7 @@ def anonymize(input_path: str, output_path: str, anonymization_actions: dict, de Read data from input path (folder or file) and launch the anonymization. :param input_path: Path to a folder or to a file. If set to a folder, - then cross all over subfiles and apply anonymization. + then cross all over sub-folders and apply anonymization. :param output_path: Path to a folder or to a file. :param anonymization_actions: List of actions that will be applied on tags. :param deletePrivateTags: Whether to delete private tags. diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py index a57625c..518570a 100644 --- a/dicomanonymizer/simpledicomanonymizer.py +++ b/dicomanonymizer/simpledicomanonymizer.py @@ -105,7 +105,7 @@ def replace_element(element): See https://laurelbridge.com/pdf/Dicom-Anonymization-Conformance-Statement.pdf """ if element.VR in ('LO', 'LT', 'SH', 'PN', 'CS', 'ST', 'UT'): - element.value = 'Anonymized' + element.value = 'ANONYMIZED' # CS VR accepts only uppercase characters elif element.VR == 'UI': replace_element_UID(element) elif element.VR in ('DS', 'IS'): @@ -118,8 +118,16 @@ def replace_element(element): element.value = b'Anonymized' elif element.VR == 'SQ': for sub_dataset in element.value: - for sub_element in sub_dataset.elements(): - replace_element(sub_element) + for sub_element in sub_dataset.elements(): + if isinstance(sub_element, pydicom.dataelem.RawDataElement): + # RawDataElement is a NamedTuple, so cannot set its value attribute. + # Convert it to a DataElement, replace value, and set it back. + # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63 + e2 = pydicom.dataelem.DataElement_from_raw(sub_element) + replace_element(e2) + sub_dataset.add(e2) + else: + replace_element(sub_element) else: raise NotImplementedError('Not anonymized. VR {} not yet implemented.'.format(element.VR)) diff --git a/examples/anonymize_dataset.py b/examples/anonymize_dataset.py index 4c748ce..febe396 100644 --- a/examples/anonymize_dataset.py +++ b/examples/anonymize_dataset.py @@ -2,13 +2,17 @@ from pydicom.data import get_testdata_file from pydicom import dcmread + def main(): original_ds = dcmread(get_testdata_file("CT_small.dcm")) data_ds = original_ds.copy() - anonymize_dataset(data_ds, delete_private_tags=True) # Anonymization is done in-place + anonymize_dataset( + data_ds, delete_private_tags=True + ) # Anonymization is done in-place print("Examples of original -> anonymized values:") for tt in ["PatientName", "PatientID", "StudyDate"]: print(f" {tt}: '{original_ds[tt].value}' -> '{data_ds[tt].value}'") + if __name__ == "__main__": main() diff --git a/examples/anonymize_extra_rules.py b/examples/anonymize_extra_rules.py index 6ad1b55..13cdb74 100644 --- a/examples/anonymize_extra_rules.py +++ b/examples/anonymize_extra_rules.py @@ -1,11 +1,22 @@ import argparse from dicomanonymizer import ALL_TAGS, anonymize, keep + def main(): parser = argparse.ArgumentParser(add_help=True) - parser.add_argument('input', help='Path to the input dicom file or input directory which contains dicom files') - parser.add_argument('output', help='Path to the output dicom file or output directory which will contains dicom files') - parser.add_argument('--suffix', action='store', help='Suffix that will be added at the end of series description') + parser.add_argument( + "input", + help="Path to the input dicom file or input directory which contains dicom files", + ) + parser.add_argument( + "output", + help="Path to the output dicom file or output directory which will contains dicom files", + ) + parser.add_argument( + "--suffix", + action="store", + help="Suffix that will be added at the end of series description", + ) args = parser.parse_args() input_dicom_path = args.input @@ -16,7 +27,7 @@ def main(): def setup_series_description(dataset, tag): element = dataset.get(tag) if element is not None: - element.value = f'{element.value}-{args.suffix}' + element.value = f"{element.value}-{args.suffix}" # ALL_TAGS variable is defined on file dicomfields.py # the 'keep' method is already defined into the dicom-anonymizer @@ -28,7 +39,13 @@ def setup_series_description(dataset, tag): extra_anonymization_rules[(0x0008, 0x103E)] = setup_series_description # Launch the anonymization - anonymize(input_dicom_path, output_dicom_path, extra_anonymization_rules, delete_private_tags=False) + anonymize( + input_dicom_path, + output_dicom_path, + extra_anonymization_rules, + delete_private_tags=False, + ) + -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/tests/test_anon.py b/tests/test_anon.py index 6560d78..b5e003a 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -16,7 +16,7 @@ warnings.filterwarnings("ignore") -def get_all_failed(): +def get_all_failed(): # sourcery skip: inline-immediately-returned-variable # The following files are intended to fail dcmread # No point including them for anonymization testing dcmread_failed = [ @@ -27,50 +27,7 @@ def get_all_failed(): "OT-PAL-8-face.dcm", ] - # TODO: Investigate why these fail replacement test of anonymization - replaced_failed = [ - "693_J2KI.dcm", - "JPEG-lossy.dcm", - "JPEG2000-embedded-sequence-delimiter.dcm", - "JPEG2000.dcm", - "JPGExtended.dcm", - "reportsi.dcm", - "reportsi_with_empty_number_tags.dcm", - "SC_rgb_gdcm_KY.dcm", - "SC_rgb_jpeg_lossy_gdcm.dcm", - "693_UNCI.dcm", - "JPEG-LL.dcm", - "JPEG2000_UNC.dcm", - "MR2_J2KI.dcm", - "MR2_J2KR.dcm", - "MR2_UNCI.dcm", - "RG1_J2KI.dcm", - "RG1_J2KR.dcm", - "RG1_UNCI.dcm", - "RG3_J2KI.dcm", - "RG3_J2KI.dcm", - "RG3_J2KR.dcm", - "RG3_UNCI.dcm", - "SC_rgb_gdcm2k_uncompressed.dcm", - "US1_J2KI.dcm", - "US1_J2KR.dcm", - "US1_UNCI.dcm", - "test-SR.dcm", - ] - - # TODO: Investigate why these fail deletion test of anonymization - deleted_failed = [ - "color3d_jpeg_baseline.dcm", - "JPGLosslessP14SV1_1s_1f_8b.dcm", - "test-SR.dcm", - ] - - # TODO: Investigate why these fail emptying test of anonymization - emptied_failed = [ - "JPGLosslessP14SV1_1s_1f_8b.dcm", - "test-SR.dcm", - ] - return dcmread_failed + replaced_failed + deleted_failed + emptied_failed + return dcmread_failed @lru_cache(maxsize=None) @@ -83,6 +40,7 @@ def get_passing_files(): @pytest.fixture(scope="module", params=get_passing_files()) def orig_anon_dataset(request): orig_ds = dcmread(request.param) + orig_ds.filename = None # Non-None value causes warnings in copy(). Not needed for this testing anon_ds = orig_ds.copy() anonymize_dataset(anon_ds) return (orig_ds, anon_ds) @@ -91,34 +49,81 @@ def orig_anon_dataset(request): def test_deleted_tags_are_removed(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset deleted_tags = dicomfields.X_TAGS - for tt in deleted_tags: - if len(tt) == 2 and tt in orig_ds: - assert tt not in anon_ds + for tt in deleted_tags: # sourcery skip: no-loop-in-tests + if ( + len(tt) == 2 and tt in orig_ds + ): # sourcery skip: merge-nested-ifs, no-conditionals-in-tests + # TODO: Investigate why Date type are replaced instead of deleted + # See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/56 + if orig_ds[tt].VR != "DA": # sourcery skip: no-conditionals-in-tests + assert ( + tt not in anon_ds + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" -def test_changed_tags_are_replaced(orig_anon_dataset): - changed_tags = ( - dicomfields.U_TAGS - + dicomfields.D_TAGS - + dicomfields.Z_D_TAGS - + dicomfields.X_D_TAGS - + dicomfields.X_Z_D_TAGS - + dicomfields.X_Z_U_STAR_TAGS - ) +changed_tags = ( + dicomfields.U_TAGS + + dicomfields.D_TAGS + + dicomfields.Z_D_TAGS + + dicomfields.X_D_TAGS + + dicomfields.X_Z_D_TAGS + + dicomfields.X_Z_U_STAR_TAGS +) + +empty_values = (0, "", "00010101", "000000.00", "ANONYMIZED") + + +def is_elem_replaced(orig, anon) -> bool: + if orig.VR == "SQ": + for x, y in zip(orig.value, anon.value): + for tt in changed_tags: + if tt in x and len(x[tt].value) > 0: + assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!" + assert is_elem_replaced( + x[tt], y[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced" + return True + + return orig.value != anon.value if orig.value not in empty_values else True + + +def test_changed_tags_are_replaced(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset - for tt in changed_tags: - if tt in orig_ds: - assert anon_ds[tt] != orig_ds[tt] + for tt in changed_tags: # sourcery skip: no-loop-in-tests + if tt in orig_ds: # sourcery skip: no-conditionals-in-tests + assert ( + tt in anon_ds + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!" + assert is_elem_replaced( + orig_ds[tt], anon_ds[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced" -def test_empty_tags_are_emptied(orig_anon_dataset): - empty_values = (0, "", "00010101", "000000.00") - empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS +empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS + +def is_elem_empty(elem) -> bool: + if elem.VR == "SQ": + for x in elem.value: + for tt in empty_tags: + if tt in x and len(x[tt].value) > 0: + assert is_elem_empty( + x[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty" + return True + + return elem.value in empty_values + + +def test_empty_tags_are_emptied(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset - for tt in empty_tags: - if tt in orig_ds: - assert anon_ds[tt].value in empty_values + for tt in empty_tags: # sourcery skip: no-loop-in-tests + if ( + tt in orig_ds and len(orig_ds[tt].value) > 0 + ): # sourcery skip: no-conditionals-in-tests + assert is_elem_empty( + anon_ds[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" diff --git a/tests/test_anonymization_without_dicom.py b/tests/test_anonymization_without_dicom.py index 2c5a0b5..7420c39 100644 --- a/tests/test_anonymization_without_dicom.py +++ b/tests/test_anonymization_without_dicom.py @@ -2,31 +2,32 @@ from dicomanonymizer import anonymize_dataset + def test_anonymization_without_dicom_file(): # Create a list of tags object that should contains id, type and value fields = [ - { # Replaced by Anonymized - 'id': (0x0040, 0xA123), - 'type': 'LO', - 'value': 'Annie de la Fontaine', + { # Replaced by Anonymized + "id": (0x0040, 0xA123), + "type": "LO", + "value": "Annie de la Fontaine", + }, + { # Replaced with empty value + "id": (0x0008, 0x0050), + "type": "TM", + "value": "bar", }, - { # Replaced with empty value - 'id': (0x0008, 0x0050), - 'type': 'TM', - 'value': 'bar', + { # Deleted + "id": (0x0018, 0x4000), + "type": "VR", + "value": "foo", }, - { # Deleted - 'id': (0x0018, 0x4000), - 'type': 'VR', - 'value': 'foo', - } ] # Create a readable dataset for pydicom data = pydicom.Dataset() # Add each field into the dataset - for field in fields: - data.add_new(field['id'], field['type'], field['value']) + for field in fields: # sourcery skip: no-loop-in-tests + data.add_new(field["id"], field["type"], field["value"]) anonymize_dataset(data)