Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ __pycache__
.vscode
build
*.egg-info
.python-version
# Ignore Pipfile.lock, since different versions of python and OS produce different hashes
Pipfile.lock
.python-version
15 changes: 15 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

# pipenv is useful for developing local package in
# editable mode. See https://pypi.org/project/pipenv/

[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
dicom-anonymizer = {file = ".", editable = true}

[dev-packages]
pytest = "*"
setuptools = "*"
50 changes: 50 additions & 0 deletions cspell.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// cSpell Settings
{
// Version of the setting file. Always 0.2
"version": "0.2",
// language - current active spelling language
"language": "en",
// ignorepaths - list of glob patterns for files to ignore
"ignorePaths": [
"ext",
"*.json",
"Pipfile.lock",
"**/tests"
],
// words - list of words to be always considered correct
"words": [
"anonymization",
"argparse",
"bdist",
"behaviour",
"dcmread",
"DICM",
"DICOM",
"dicomanonymizer",
"dicomfields",
"Edern",
"Fiducials",
"Fontaine",
"Haumont",
"Kitware",
"Laurenn",
"multival",
"pipenv",
"Pipfile",
"pydicom",
"pytest",
"Radiopharmaceutical",
"setuptools",
"simpledicomanonymizer",
"tqdm",
"venv",
"virtualenv",
"xgggg"
]
// flagWords - list of words to be always considered incorrect
// This is useful for offensive words and common spelling errors.
// For example "hte" should be "the"
// "flagWords": [
// "hte"
// ]
}
2 changes: 1 addition & 1 deletion dicomanonymizer/anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def anonymize(input_path: str, output_path: str, anonymization_actions: dict, de
Read data from input path (folder or file) and launch the anonymization.

:param input_path: Path to a folder or to a file. If set to a folder,
then cross all over subfiles and apply anonymization.
then cross all over sub-folders and apply anonymization.
:param output_path: Path to a folder or to a file.
:param anonymization_actions: List of actions that will be applied on tags.
:param deletePrivateTags: Whether to delete private tags.
Expand Down
14 changes: 11 additions & 3 deletions dicomanonymizer/simpledicomanonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def replace_element(element):
See https://laurelbridge.com/pdf/Dicom-Anonymization-Conformance-Statement.pdf
"""
if element.VR in ('LO', 'LT', 'SH', 'PN', 'CS', 'ST', 'UT'):
element.value = 'Anonymized'
element.value = 'ANONYMIZED' # CS VR accepts only uppercase characters
elif element.VR == 'UI':
replace_element_UID(element)
elif element.VR in ('DS', 'IS'):
Expand All @@ -118,8 +118,16 @@ def replace_element(element):
element.value = b'Anonymized'
elif element.VR == 'SQ':
for sub_dataset in element.value:
for sub_element in sub_dataset.elements():
replace_element(sub_element)
for sub_element in sub_dataset.elements():
if isinstance(sub_element, pydicom.dataelem.RawDataElement):
# RawDataElement is a NamedTuple, so cannot set its value attribute.
# Convert it to a DataElement, replace value, and set it back.
# Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63
e2 = pydicom.dataelem.DataElement_from_raw(sub_element)
replace_element(e2)
sub_dataset.add(e2)
else:
replace_element(sub_element)
else:
raise NotImplementedError('Not anonymized. VR {} not yet implemented.'.format(element.VR))

Expand Down
6 changes: 5 additions & 1 deletion examples/anonymize_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
from pydicom.data import get_testdata_file
from pydicom import dcmread


def main():
original_ds = dcmread(get_testdata_file("CT_small.dcm"))
data_ds = original_ds.copy()
anonymize_dataset(data_ds, delete_private_tags=True) # Anonymization is done in-place
anonymize_dataset(
data_ds, delete_private_tags=True
) # Anonymization is done in-place
print("Examples of original -> anonymized values:")
for tt in ["PatientName", "PatientID", "StudyDate"]:
print(f" {tt}: '{original_ds[tt].value}' -> '{data_ds[tt].value}'")


if __name__ == "__main__":
main()
29 changes: 23 additions & 6 deletions examples/anonymize_extra_rules.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
import argparse
from dicomanonymizer import ALL_TAGS, anonymize, keep


def main():
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('input', help='Path to the input dicom file or input directory which contains dicom files')
parser.add_argument('output', help='Path to the output dicom file or output directory which will contains dicom files')
parser.add_argument('--suffix', action='store', help='Suffix that will be added at the end of series description')
parser.add_argument(
"input",
help="Path to the input dicom file or input directory which contains dicom files",
)
parser.add_argument(
"output",
help="Path to the output dicom file or output directory which will contains dicom files",
)
parser.add_argument(
"--suffix",
action="store",
help="Suffix that will be added at the end of series description",
)
args = parser.parse_args()

input_dicom_path = args.input
Expand All @@ -16,7 +27,7 @@ def main():
def setup_series_description(dataset, tag):
element = dataset.get(tag)
if element is not None:
element.value = f'{element.value}-{args.suffix}'
element.value = f"{element.value}-{args.suffix}"

# ALL_TAGS variable is defined on file dicomfields.py
# the 'keep' method is already defined into the dicom-anonymizer
Expand All @@ -28,7 +39,13 @@ def setup_series_description(dataset, tag):
extra_anonymization_rules[(0x0008, 0x103E)] = setup_series_description

# Launch the anonymization
anonymize(input_dicom_path, output_dicom_path, extra_anonymization_rules, delete_private_tags=False)
anonymize(
input_dicom_path,
output_dicom_path,
extra_anonymization_rules,
delete_private_tags=False,
)


if __name__ == '__main__':
if __name__ == "__main__":
main()
137 changes: 71 additions & 66 deletions tests/test_anon.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
warnings.filterwarnings("ignore")


def get_all_failed():
def get_all_failed(): # sourcery skip: inline-immediately-returned-variable
# The following files are intended to fail dcmread
# No point including them for anonymization testing
dcmread_failed = [
Expand All @@ -27,50 +27,7 @@ def get_all_failed():
"OT-PAL-8-face.dcm",
]

# TODO: Investigate why these fail replacement test of anonymization
replaced_failed = [
"693_J2KI.dcm",
"JPEG-lossy.dcm",
"JPEG2000-embedded-sequence-delimiter.dcm",
"JPEG2000.dcm",
"JPGExtended.dcm",
"reportsi.dcm",
"reportsi_with_empty_number_tags.dcm",
"SC_rgb_gdcm_KY.dcm",
"SC_rgb_jpeg_lossy_gdcm.dcm",
"693_UNCI.dcm",
"JPEG-LL.dcm",
"JPEG2000_UNC.dcm",
"MR2_J2KI.dcm",
"MR2_J2KR.dcm",
"MR2_UNCI.dcm",
"RG1_J2KI.dcm",
"RG1_J2KR.dcm",
"RG1_UNCI.dcm",
"RG3_J2KI.dcm",
"RG3_J2KI.dcm",
"RG3_J2KR.dcm",
"RG3_UNCI.dcm",
"SC_rgb_gdcm2k_uncompressed.dcm",
"US1_J2KI.dcm",
"US1_J2KR.dcm",
"US1_UNCI.dcm",
"test-SR.dcm",
]

# TODO: Investigate why these fail deletion test of anonymization
deleted_failed = [
"color3d_jpeg_baseline.dcm",
"JPGLosslessP14SV1_1s_1f_8b.dcm",
"test-SR.dcm",
]

# TODO: Investigate why these fail emptying test of anonymization
emptied_failed = [
"JPGLosslessP14SV1_1s_1f_8b.dcm",
"test-SR.dcm",
]
return dcmread_failed + replaced_failed + deleted_failed + emptied_failed
return dcmread_failed


@lru_cache(maxsize=None)
Expand All @@ -83,6 +40,7 @@ def get_passing_files():
@pytest.fixture(scope="module", params=get_passing_files())
def orig_anon_dataset(request):
orig_ds = dcmread(request.param)
orig_ds.filename = None # Non-None value causes warnings in copy(). Not needed for this testing
anon_ds = orig_ds.copy()
anonymize_dataset(anon_ds)
return (orig_ds, anon_ds)
Expand All @@ -91,34 +49,81 @@ def orig_anon_dataset(request):
def test_deleted_tags_are_removed(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset
deleted_tags = dicomfields.X_TAGS
for tt in deleted_tags:
if len(tt) == 2 and tt in orig_ds:
assert tt not in anon_ds

for tt in deleted_tags: # sourcery skip: no-loop-in-tests
if (
len(tt) == 2 and tt in orig_ds
): # sourcery skip: merge-nested-ifs, no-conditionals-in-tests
# TODO: Investigate why Date type are replaced instead of deleted
# See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/56
if orig_ds[tt].VR != "DA": # sourcery skip: no-conditionals-in-tests
assert (
tt not in anon_ds
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"

def test_changed_tags_are_replaced(orig_anon_dataset):
changed_tags = (
dicomfields.U_TAGS
+ dicomfields.D_TAGS
+ dicomfields.Z_D_TAGS
+ dicomfields.X_D_TAGS
+ dicomfields.X_Z_D_TAGS
+ dicomfields.X_Z_U_STAR_TAGS
)

changed_tags = (
dicomfields.U_TAGS
+ dicomfields.D_TAGS
+ dicomfields.Z_D_TAGS
+ dicomfields.X_D_TAGS
+ dicomfields.X_Z_D_TAGS
+ dicomfields.X_Z_U_STAR_TAGS
)

empty_values = (0, "", "00010101", "000000.00", "ANONYMIZED")


def is_elem_replaced(orig, anon) -> bool:
if orig.VR == "SQ":
for x, y in zip(orig.value, anon.value):
for tt in changed_tags:
if tt in x and len(x[tt].value) > 0:
assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!"
assert is_elem_replaced(
x[tt], y[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced"
return True

return orig.value != anon.value if orig.value not in empty_values else True


def test_changed_tags_are_replaced(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset

for tt in changed_tags:
if tt in orig_ds:
assert anon_ds[tt] != orig_ds[tt]
for tt in changed_tags: # sourcery skip: no-loop-in-tests
if tt in orig_ds: # sourcery skip: no-conditionals-in-tests
assert (
tt in anon_ds
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!"
assert is_elem_replaced(
orig_ds[tt], anon_ds[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced"


def test_empty_tags_are_emptied(orig_anon_dataset):
empty_values = (0, "", "00010101", "000000.00")
empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS
empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS


def is_elem_empty(elem) -> bool:
if elem.VR == "SQ":
for x in elem.value:
for tt in empty_tags:
if tt in x and len(x[tt].value) > 0:
assert is_elem_empty(
x[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty"
return True

return elem.value in empty_values


def test_empty_tags_are_emptied(orig_anon_dataset):
orig_ds, anon_ds = orig_anon_dataset

for tt in empty_tags:
if tt in orig_ds:
assert anon_ds[tt].value in empty_values
for tt in empty_tags: # sourcery skip: no-loop-in-tests
if (
tt in orig_ds and len(orig_ds[tt].value) > 0
): # sourcery skip: no-conditionals-in-tests
assert is_elem_empty(
anon_ds[tt]
), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty"
Loading