Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a222d63
Merge pull request #4 from KitwareMedical/master
smjoshiatglobus Jan 12, 2024
4b1ee1d
unit test fix
smjoshiatglobus Jan 26, 2024
9818970
pipenv setup
smjoshiatglobus Jan 26, 2024
387cfb4
exclude DA type for deletion test
smjoshiatglobus Jan 26, 2024
bdaad70
fixed replacement tests
smjoshiatglobus Jan 26, 2024
a4bbcc9
black formatting
smjoshiatglobus Jan 26, 2024
f02873f
black formatting
smjoshiatglobus Jan 26, 2024
683c4ba
black formatting
smjoshiatglobus Jan 26, 2024
02ce585
silenced sourcery warnings
smjoshiatglobus Jan 26, 2024
6ea6eaa
support spellchecker
smjoshiatglobus Jan 26, 2024
8d31c5c
typos and spellcheck
smjoshiatglobus Jan 26, 2024
399d136
added issue number and link
smjoshiatglobus Jan 26, 2024
9780514
behaviour is correct spelling
smjoshiatglobus Jan 26, 2024
ee535a8
Fix for #63, can't set attribute
smjoshiatglobus Feb 22, 2024
3efe54f
Suppressed filename copying warning
smjoshiatglobus Feb 22, 2024
843465f
Merge pull request #5 from KitwareMedical/master
smjoshiatglobus Feb 26, 2024
2ec2a8d
Merge branch 'master' into fix_unit_test
smjoshiatglobus Feb 26, 2024
f6f8421
isinstance instead of type
smjoshiatglobus Feb 26, 2024
2d53d34
Remove unused code
smjoshiatglobus Feb 26, 2024
f5bfe2c
black formatting
smjoshiatglobus Jan 26, 2024
da91c4f
silenced sourcery warnings
smjoshiatglobus Jan 26, 2024
2f07e17
support spellchecker
smjoshiatglobus Jan 26, 2024
893ce24
typos and spellcheck
smjoshiatglobus Jan 26, 2024
9af7d7c
added issue number and link
smjoshiatglobus Jan 26, 2024
ad6b05a
behaviour is correct spelling
smjoshiatglobus Jan 26, 2024
bc08b06
Fix for #63, can't set attribute
smjoshiatglobus Feb 22, 2024
0872799
Suppressed filename copying warning
smjoshiatglobus Feb 22, 2024
428ccb3
ENH: Add dicom fields scrapping script
pchoisel Jan 15, 2024
2aae45f
isinstance instead of type
smjoshiatglobus Feb 26, 2024
af2f02c
Remove unused code
smjoshiatglobus Feb 26, 2024
89e302d
Merge branch 'fix_unit_test' of github.com:globusmedical/dicom-anonym…
smjoshiatglobus Feb 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@ __pycache__
.vscode
build
*.egg-info
.python-version
.python-version
# Ignore Pipfile.lock, since different versions of python and OS produce different hashes
Pipfile.lock
.python-version
15 changes: 15 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

# pipenv is useful for developing local package in
# editable mode. See https://pypi.org/project/pipenv/

[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
dicom-anonymizer = {file = ".", editable = true}

[dev-packages]
pytest = "*"
setuptools = "*"
50 changes: 50 additions & 0 deletions cspell.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// cSpell Settings
{
// Version of the setting file. Always 0.2
"version": "0.2",
// language - current active spelling language
"language": "en",
// ignorepaths - list of glob patterns for files to ignore
"ignorePaths": [
"ext",
"*.json",
"Pipfile.lock",
"**/tests"
],
// words - list of words to be always considered correct
"words": [
"anonymization",
"argparse",
"bdist",
"behaviour",
"dcmread",
"DICM",
"DICOM",
"dicomanonymizer",
"dicomfields",
"Edern",
"Fiducials",
"Fontaine",
"Haumont",
"Kitware",
"Laurenn",
"multival",
"pipenv",
"Pipfile",
"pydicom",
"pytest",
"Radiopharmaceutical",
"setuptools",
"simpledicomanonymizer",
"tqdm",
"venv",
"virtualenv",
"xgggg"
]
// flagWords - list of words to be always considered incorrect
// This is useful for offensive words and common spelling errors.
// For example "hte" should be "the"
// "flagWords": [
// "hte"
// ]
}
2 changes: 1 addition & 1 deletion dicomanonymizer/anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def anonymize(input_path: str, output_path: str, anonymization_actions: dict, de
Read data from input path (folder or file) and launch the anonymization.

:param input_path: Path to a folder or to a file. If set to a folder,
then cross all over subfiles and apply anonymization.
then cross all over sub-folders and apply anonymization.
:param output_path: Path to a folder or to a file.
:param anonymization_actions: List of actions that will be applied on tags.
:param deletePrivateTags: Whether to delete private tags.
Expand Down
14 changes: 11 additions & 3 deletions dicomanonymizer/simpledicomanonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def replace_element(element):
See https://laurelbridge.com/pdf/Dicom-Anonymization-Conformance-Statement.pdf
"""
if element.VR in ('LO', 'LT', 'SH', 'PN', 'CS', 'ST', 'UT'):
element.value = 'Anonymized'
element.value = 'ANONYMIZED' # CS VR accepts only uppercase characters
elif element.VR == 'UI':
replace_element_UID(element)
elif element.VR in ('DS', 'IS'):
Expand All @@ -118,8 +118,16 @@ def replace_element(element):
element.value = b'Anonymized'
elif element.VR == 'SQ':
for sub_dataset in element.value:
for sub_element in sub_dataset.elements():
replace_element(sub_element)
for sub_element in sub_dataset.elements():
if isinstance(sub_element, pydicom.dataelem.RawDataElement):
# RawDataElement is a NamedTuple, so cannot set its value attribute.
# Convert it to a DataElement, replace value, and set it back.
# Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63
e2 = pydicom.dataelem.DataElement_from_raw(sub_element)
replace_element(e2)
sub_dataset.add(e2)
else:
replace_element(sub_element)
else:
raise NotImplementedError('Not anonymized. VR {} not yet implemented.'.format(element.VR))

Expand Down
6 changes: 5 additions & 1 deletion examples/anonymize_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
from pydicom.data import get_testdata_file
from pydicom import dcmread


def main():
original_ds = dcmread(get_testdata_file("CT_small.dcm"))
data_ds = original_ds.copy()
anonymize_dataset(data_ds, delete_private_tags=True) # Anonymization is done in-place
anonymize_dataset(
data_ds, delete_private_tags=True
) # Anonymization is done in-place
print("Examples of original -> anonymized values:")
for tt in ["PatientName", "PatientID", "StudyDate"]:
print(f" {tt}: '{original_ds[tt].value}' -> '{data_ds[tt].value}'")


if __name__ == "__main__":
main()
29 changes: 23 additions & 6 deletions examples/anonymize_extra_rules.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
import argparse
from dicomanonymizer import ALL_TAGS, anonymize, keep


def main():
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('input', help='Path to the input dicom file or input directory which contains dicom files')
parser.add_argument('output', help='Path to the output dicom file or output directory which will contains dicom files')
parser.add_argument('--suffix', action='store', help='Suffix that will be added at the end of series description')
parser.add_argument(
"input",
help="Path to the input dicom file or input directory which contains dicom files",
)
parser.add_argument(
"output",
help="Path to the output dicom file or output directory which will contains dicom files",
)
parser.add_argument(
"--suffix",
action="store",
help="Suffix that will be added at the end of series description",
)
args = parser.parse_args()

input_dicom_path = args.input
Expand All @@ -16,7 +27,7 @@ def main():
def setup_series_description(dataset, tag):
element = dataset.get(tag)
if element is not None:
element.value = f'{element.value}-{args.suffix}'
element.value = f"{element.value}-{args.suffix}"

# ALL_TAGS variable is defined on file dicomfields.py
# the 'keep' method is already defined into the dicom-anonymizer
Expand All @@ -28,7 +39,13 @@ def setup_series_description(dataset, tag):
extra_anonymization_rules[(0x0008, 0x103E)] = setup_series_description

# Launch the anonymization
anonymize(input_dicom_path, output_dicom_path, extra_anonymization_rules, delete_private_tags=False)
anonymize(
input_dicom_path,
output_dicom_path,
extra_anonymization_rules,
delete_private_tags=False,
)


if __name__ == '__main__':
if __name__ == "__main__":
main()
137 changes: 71 additions & 66 deletions tests/test_anon.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
warnings.filterwarnings("ignore")


def get_all_failed():
def get_all_failed(): # sourcery skip: inline-immediately-returned-variable
# The following files are intended to fail dcmread
# No point including them for anonymization testing
dcmread_failed = [
Expand All @@ -27,50 +27,7 @@ def get_all_failed():
"OT-PAL-8-face.dcm",
]

# TODO: Investigate why these fail replacement test of anonymization
replaced_failed = [
"693_J2KI.dcm",
"JPEG-lossy.dcm",
"JPEG2000-embedded-sequence-delimiter.dcm",
"JPEG2000.dcm",
"JPGExtended.dcm",
"reportsi.dcm",
"reportsi_with_empty_number_tags.dcm",
"SC_rgb_gdcm_KY.dcm",
"SC_rgb_jpeg_lossy_gdcm.dcm",
"693_UNCI.dcm",
"JPEG-LL.dcm",
"JPEG2000_UNC.dcm",
"MR2_J2KI.dcm",
"MR2_J2KR.dcm",
"MR2_UNCI.dcm",
"RG1_J2KI.dcm",
"RG1_J2KR.dcm",
"RG1_UNCI.dcm",
"RG3_J2KI.dcm",
"RG3_J2KI.dcm",
"RG3_J2KR.dcm",
"RG3_UNCI.dcm",
"SC_rgb_gdcm2k_uncompressed.dcm",
"US1_J2KI.dcm",
"US1_J2KR.dcm",
"US1_UNCI.dcm",
"test-SR.dcm",
]

# TODO: Investigate why these fail deletion test of anonymization
deleted_failed = [
"color3d_jpeg_baseline.dcm",
"JPGLosslessP14SV1_1s_1f_8b.dcm",
"test-SR.dcm",
]

# TODO: Investigate why these fail emptying test of anonymization
emptied_failed = [
"JPGLosslessP14SV1_1s_1f_8b.dcm",
"test-SR.dcm",
]
return dcmread_failed + replaced_failed + deleted_failed + emptied_failed
return dcmread_failed


@lru_cache(maxsize=None)
Expand All @@ -83,6 +40,7 @@ def get_passing_files():
@pytest.fixture(scope="module", params=get_passing_files())
def orig_anon_dataset(request):
orig_ds = dcmread(request.param)
orig_ds.filename = None # Non-None value causes warnings in copy(). Not needed for this testing
anon_ds = orig_ds.copy()
anonymize_dataset(anon_ds)
return (orig_ds, anon_ds)
Expand All @@ -91,34 +49,81 @@ def orig_anon_dataset(request):
def test_deleted_tags_are_removed(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset
deleted_tags = dicomfields.X_TAGS
for tt in deleted_tags:
if len(tt) == 2 and tt in orig_ds:
assert tt not in anon_ds

for tt in deleted_tags: # sourcery skip: no-loop-in-tests
if (
len(tt) == 2 and tt in orig_ds
): # sourcery skip: merge-nested-ifs, no-conditionals-in-tests
Comment on lines +54 to +56
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This formatting feels weird, is it because of Flake8 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment suppresses couple of errors from Sourcery plugin: https://sourcery.ai/

# TODO: Investigate why Date type are replaced instead of deleted
# See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/56
if orig_ds[tt].VR != "DA": # sourcery skip: no-conditionals-in-tests
assert (
tt not in anon_ds
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"

def test_changed_tags_are_replaced(orig_anon_dataset):
changed_tags = (
dicomfields.U_TAGS
+ dicomfields.D_TAGS
+ dicomfields.Z_D_TAGS
+ dicomfields.X_D_TAGS
+ dicomfields.X_Z_D_TAGS
+ dicomfields.X_Z_U_STAR_TAGS
)

changed_tags = (
dicomfields.U_TAGS
+ dicomfields.D_TAGS
+ dicomfields.Z_D_TAGS
+ dicomfields.X_D_TAGS
+ dicomfields.X_Z_D_TAGS
+ dicomfields.X_Z_U_STAR_TAGS
)

empty_values = (0, "", "00010101", "000000.00", "ANONYMIZED")


def is_elem_replaced(orig, anon) -> bool:
if orig.VR == "SQ":
for x, y in zip(orig.value, anon.value):
for tt in changed_tags:
if tt in x and len(x[tt].value) > 0:
assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!"
assert is_elem_replaced(
x[tt], y[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced"
return True

return orig.value != anon.value if orig.value not in empty_values else True


def test_changed_tags_are_replaced(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset

for tt in changed_tags:
if tt in orig_ds:
assert anon_ds[tt] != orig_ds[tt]
for tt in changed_tags: # sourcery skip: no-loop-in-tests
if tt in orig_ds: # sourcery skip: no-conditionals-in-tests
assert (
tt in anon_ds
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!"
assert is_elem_replaced(
orig_ds[tt], anon_ds[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced"


def test_empty_tags_are_emptied(orig_anon_dataset):
empty_values = (0, "", "00010101", "000000.00")
empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS
empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS


def is_elem_empty(elem) -> bool:
if elem.VR == "SQ":
for x in elem.value:
for tt in empty_tags:
if tt in x and len(x[tt].value) > 0:
assert is_elem_empty(
x[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty"
return True

return elem.value in empty_values


def test_empty_tags_are_emptied(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset

for tt in empty_tags:
if tt in orig_ds:
assert anon_ds[tt].value in empty_values
for tt in empty_tags: # sourcery skip: no-loop-in-tests
if (
tt in orig_ds and len(orig_ds[tt].value) > 0
): # sourcery skip: no-conditionals-in-tests
assert is_elem_empty(
anon_ds[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty"
Loading