From 4b1ee1d1bbd768644b1bb9d30d1c5ed8b7e2499a Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Thu, 25 Jan 2024 21:24:19 -0500
Subject: [PATCH 01/17] unit test fix

---
 tests/test_anon.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/tests/test_anon.py b/tests/test_anon.py
index 6560d78..26da00d 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -47,6 +47,7 @@ def get_all_failed():
         "RG1_J2KI.dcm",
         "RG1_J2KR.dcm",
         "RG1_UNCI.dcm",
+        "RG1_UNCR.dcm", # TODO: Tag (0018,1200) is getting replaced with date "00010101"
         "RG3_J2KI.dcm",
         "RG3_J2KI.dcm",
         "RG3_J2KR.dcm",
@@ -93,7 +94,7 @@ def test_deleted_tags_are_removed(orig_anon_dataset):
     deleted_tags = dicomfields.X_TAGS
     for tt in deleted_tags:
         if len(tt) == 2 and tt in orig_ds:
-            assert tt not in anon_ds
+            assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"
 
 
 def test_changed_tags_are_replaced(orig_anon_dataset):
@@ -112,13 +113,22 @@ def test_changed_tags_are_replaced(orig_anon_dataset):
         if tt in orig_ds:
             assert anon_ds[tt] != orig_ds[tt]
 
+empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS
 
-def test_empty_tags_are_emptied(orig_anon_dataset):
-    empty_values = (0, "", "00010101", "000000.00")
-    empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS
+def is_elem_empty(elem) -> bool:
+    if elem.VR == "SQ":
+        for x in elem.value:
+            for tt in empty_tags:
+                if tt in x and len(x[tt].value)>0:
+                    assert is_elem_empty(x[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty"
+        return True
+
+    empty_values = (0, "", "00010101", "000000.00")    
+    return elem.value in empty_values
 
+def test_empty_tags_are_emptied(orig_anon_dataset):
     orig_ds, anon_ds = orig_anon_dataset
 
     for tt in empty_tags:
-        if tt in orig_ds:
-            assert anon_ds[tt].value in empty_values
+        if tt in orig_ds and len(orig_ds[tt].value) > 0:
+            assert is_elem_empty(anon_ds[tt]), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" 

From 981897027120698b8331887dff9b14783746fc86 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Thu, 25 Jan 2024 21:24:38 -0500
Subject: [PATCH 02/17] pipenv setup

---
 .gitignore |  4 +++-
 Pipfile    | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 Pipfile

diff --git a/.gitignore b/.gitignore
index 529c1b9..ff4c4a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,6 @@ env
 __pycache__
 .vscode
 build
-*.egg-info
\ No newline at end of file
+*.egg-info
+# Ignore Pipfile.lock, since different versions of python and OS produce different hashes
+Pipfile.lock
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..4a1d299
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,15 @@
+
+# pipenv is useful for developing local package in 
+# editable mode. See https://pypi.org/project/pipenv/
+
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+dicom-anonymizer = {file = ".", editable = true}
+
+[dev-packages]
+pytest = "*"
+setuptools = "*"

From 387cfb4a3a3861308a89a687d213acc68ab675c9 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Fri, 26 Jan 2024 00:32:05 -0500
Subject: [PATCH 03/17] exclude DA type for deletion test

---
 tests/test_anon.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/tests/test_anon.py b/tests/test_anon.py
index 26da00d..3a03964 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -47,7 +47,6 @@ def get_all_failed():
         "RG1_J2KI.dcm",
         "RG1_J2KR.dcm",
         "RG1_UNCI.dcm",
-        "RG1_UNCR.dcm", # TODO: Tag (0018,1200) is getting replaced with date "00010101"
         "RG3_J2KI.dcm",
         "RG3_J2KI.dcm",
         "RG3_J2KR.dcm",
@@ -59,18 +58,8 @@ def get_all_failed():
         "test-SR.dcm",
     ]
 
-    # TODO: Investigate why these fail deletion test of anonymization
-    deleted_failed = [
-        "color3d_jpeg_baseline.dcm",
-        "JPGLosslessP14SV1_1s_1f_8b.dcm",
-        "test-SR.dcm",
-    ]
-
-    # TODO: Investigate why these fail emptying test of anonymization
-    emptied_failed = [
-        "JPGLosslessP14SV1_1s_1f_8b.dcm",
-        "test-SR.dcm",
-    ]
+    deleted_failed = []
+    emptied_failed = []
     return dcmread_failed + replaced_failed + deleted_failed + emptied_failed
 
 
@@ -94,7 +83,10 @@ def test_deleted_tags_are_removed(orig_anon_dataset):
     deleted_tags = dicomfields.X_TAGS
     for tt in deleted_tags:
         if len(tt) == 2 and tt in orig_ds:
-            assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"
+            # TODO: Investigate why Date type are replaced instead of deleted
+            # See item #
+            if orig_ds[tt].VR != "DA":
+                assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"
 
 
 def test_changed_tags_are_replaced(orig_anon_dataset):

From bdaad703d3523cf5da412a91080803346b035f41 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Fri, 26 Jan 2024 00:52:48 -0500
Subject: [PATCH 04/17] fixed replacement tests

---
 tests/test_anon.py | 67 ++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 38 deletions(-)

diff --git a/tests/test_anon.py b/tests/test_anon.py
index 3a03964..2b13fba 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -28,34 +28,13 @@ def get_all_failed():
     ]
 
     # TODO: Investigate why these fail replacement test of anonymization
+    # Error message: AttributeError: can't set attribute
+    # when attempting to anonymize tag in dicomanonymizer\simpledicomanonymizer.py:108
+    # See issue ???
     replaced_failed = [
-        "693_J2KI.dcm",
-        "JPEG-lossy.dcm",
-        "JPEG2000-embedded-sequence-delimiter.dcm",
-        "JPEG2000.dcm",
-        "JPGExtended.dcm",
-        "reportsi.dcm",
-        "reportsi_with_empty_number_tags.dcm",
-        "SC_rgb_gdcm_KY.dcm",
-        "SC_rgb_jpeg_lossy_gdcm.dcm",
-        "693_UNCI.dcm",
-        "JPEG-LL.dcm",
-        "JPEG2000_UNC.dcm",
-        "MR2_J2KI.dcm",
-        "MR2_J2KR.dcm",
-        "MR2_UNCI.dcm",
-        "RG1_J2KI.dcm",
-        "RG1_J2KR.dcm",
-        "RG1_UNCI.dcm",
-        "RG3_J2KI.dcm",
-        "RG3_J2KI.dcm",
-        "RG3_J2KR.dcm",
-        "RG3_UNCI.dcm",
-        "SC_rgb_gdcm2k_uncompressed.dcm",
-        "US1_J2KI.dcm",
-        "US1_J2KR.dcm",
-        "US1_UNCI.dcm",
-        "test-SR.dcm",
+         "reportsi.dcm",
+         "reportsi_with_empty_number_tags.dcm",
+         "test-SR.dcm",
     ]
 
     deleted_failed = []
@@ -88,22 +67,35 @@ def test_deleted_tags_are_removed(orig_anon_dataset):
             if orig_ds[tt].VR != "DA":
                 assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"
 
+changed_tags = (
+    dicomfields.U_TAGS
+    + dicomfields.D_TAGS
+    + dicomfields.Z_D_TAGS
+    + dicomfields.X_D_TAGS
+    + dicomfields.X_Z_D_TAGS
+    + dicomfields.X_Z_U_STAR_TAGS
+)
+
+empty_values = (0, "", "00010101", "000000.00")    
+
+def is_elem_replaced(orig, anon) -> bool:
+    if orig.VR == "SQ":
+        for (x, y) in zip(orig.value, anon.value):
+            for tt in changed_tags:
+                if tt in x and len(x[tt].value)>0:
+                    assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!"
+                    assert is_elem_replaced(x[tt], y[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced"
+        return True
 
-def test_changed_tags_are_replaced(orig_anon_dataset):
-    changed_tags = (
-        dicomfields.U_TAGS
-        + dicomfields.D_TAGS
-        + dicomfields.Z_D_TAGS
-        + dicomfields.X_D_TAGS
-        + dicomfields.X_Z_D_TAGS
-        + dicomfields.X_Z_U_STAR_TAGS
-    )
+    return orig.value != anon.value if orig.value not in empty_values else True
 
+def test_changed_tags_are_replaced(orig_anon_dataset):
     orig_ds, anon_ds = orig_anon_dataset
 
     for tt in changed_tags:
         if tt in orig_ds:
-            assert anon_ds[tt] != orig_ds[tt]
+            assert tt in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!"
+            assert is_elem_replaced(orig_ds[tt], anon_ds[tt]), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced"
 
 empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS
 
@@ -115,7 +107,6 @@ def is_elem_empty(elem) -> bool:
                     assert is_elem_empty(x[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty"
         return True
 
-    empty_values = (0, "", "00010101", "000000.00")    
     return elem.value in empty_values
 
 def test_empty_tags_are_emptied(orig_anon_dataset):

From f5bfe2c516f30216a959b702ce28741fb1f79da5 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Fri, 26 Jan 2024 00:57:15 -0500
Subject: [PATCH 05/17] black formatting

black formatting

black formatting
---
 examples/anonymize_dataset.py             |  6 +++-
 examples/anonymize_extra_rules.py         | 29 +++++++++++----
 tests/test_anon.py                        | 44 ++++++++++++++++-------
 tests/test_anonymization_without_dicom.py | 29 +++++++--------
 4 files changed, 74 insertions(+), 34 deletions(-)

diff --git a/examples/anonymize_dataset.py b/examples/anonymize_dataset.py
index 4c748ce..febe396 100644
--- a/examples/anonymize_dataset.py
+++ b/examples/anonymize_dataset.py
@@ -2,13 +2,17 @@
 from pydicom.data import get_testdata_file
 from pydicom import dcmread
 
+
 def main():
     original_ds = dcmread(get_testdata_file("CT_small.dcm"))
     data_ds = original_ds.copy()
-    anonymize_dataset(data_ds, delete_private_tags=True) # Anonymization is done in-place
+    anonymize_dataset(
+        data_ds, delete_private_tags=True
+    )  # Anonymization is done in-place
     print("Examples of original -> anonymized values:")
     for tt in ["PatientName", "PatientID", "StudyDate"]:
         print(f"  {tt}: '{original_ds[tt].value}' -> '{data_ds[tt].value}'")
 
+
 if __name__ == "__main__":
     main()
diff --git a/examples/anonymize_extra_rules.py b/examples/anonymize_extra_rules.py
index 6ad1b55..13cdb74 100644
--- a/examples/anonymize_extra_rules.py
+++ b/examples/anonymize_extra_rules.py
@@ -1,11 +1,22 @@
 import argparse
 from dicomanonymizer import ALL_TAGS, anonymize, keep
 
+
 def main():
     parser = argparse.ArgumentParser(add_help=True)
-    parser.add_argument('input', help='Path to the input dicom file or input directory which contains dicom files')
-    parser.add_argument('output', help='Path to the output dicom file or output directory which will contains dicom files')
-    parser.add_argument('--suffix', action='store', help='Suffix that will be added at the end of series description')
+    parser.add_argument(
+        "input",
+        help="Path to the input dicom file or input directory which contains dicom files",
+    )
+    parser.add_argument(
+        "output",
+        help="Path to the output dicom file or output directory which will contains dicom files",
+    )
+    parser.add_argument(
+        "--suffix",
+        action="store",
+        help="Suffix that will be added at the end of series description",
+    )
     args = parser.parse_args()
 
     input_dicom_path = args.input
@@ -16,7 +27,7 @@ def main():
     def setup_series_description(dataset, tag):
         element = dataset.get(tag)
         if element is not None:
-            element.value = f'{element.value}-{args.suffix}'
+            element.value = f"{element.value}-{args.suffix}"
 
     # ALL_TAGS variable is defined on file dicomfields.py
     # the 'keep' method is already defined into the dicom-anonymizer
@@ -28,7 +39,13 @@ def setup_series_description(dataset, tag):
         extra_anonymization_rules[(0x0008, 0x103E)] = setup_series_description
 
     # Launch the anonymization
-    anonymize(input_dicom_path, output_dicom_path, extra_anonymization_rules, delete_private_tags=False)
+    anonymize(
+        input_dicom_path,
+        output_dicom_path,
+        extra_anonymization_rules,
+        delete_private_tags=False,
+    )
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/tests/test_anon.py b/tests/test_anon.py
index 2b13fba..86b8a9d 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -32,9 +32,9 @@ def get_all_failed():
     # when attempting to anonymize tag in dicomanonymizer\simpledicomanonymizer.py:108
     # See issue ???
     replaced_failed = [
-         "reportsi.dcm",
-         "reportsi_with_empty_number_tags.dcm",
-         "test-SR.dcm",
+        "reportsi.dcm",
+        "reportsi_with_empty_number_tags.dcm",
+        "test-SR.dcm",
     ]
 
     deleted_failed = []
@@ -65,7 +65,10 @@ def test_deleted_tags_are_removed(orig_anon_dataset):
             # TODO: Investigate why Date type are replaced instead of deleted
             # See item #
             if orig_ds[tt].VR != "DA":
-                assert tt not in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"
+                assert (
+                    tt not in anon_ds
+                ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"
+
 
 changed_tags = (
     dicomfields.U_TAGS
@@ -76,42 +79,57 @@ def test_deleted_tags_are_removed(orig_anon_dataset):
     + dicomfields.X_Z_U_STAR_TAGS
 )
 
-empty_values = (0, "", "00010101", "000000.00")    
+empty_values = (0, "", "00010101", "000000.00")
+
 
 def is_elem_replaced(orig, anon) -> bool:
     if orig.VR == "SQ":
-        for (x, y) in zip(orig.value, anon.value):
+        for x, y in zip(orig.value, anon.value):
             for tt in changed_tags:
-                if tt in x and len(x[tt].value)>0:
+                if tt in x and len(x[tt].value) > 0:
                     assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!"
-                    assert is_elem_replaced(x[tt], y[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced"
+                    assert is_elem_replaced(
+                        x[tt], y[tt]
+                    ), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced"
         return True
 
     return orig.value != anon.value if orig.value not in empty_values else True
 
+
 def test_changed_tags_are_replaced(orig_anon_dataset):
     orig_ds, anon_ds = orig_anon_dataset
 
     for tt in changed_tags:
         if tt in orig_ds:
-            assert tt in anon_ds, f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!"
-            assert is_elem_replaced(orig_ds[tt], anon_ds[tt]), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced"
+            assert (
+                tt in anon_ds
+            ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!"
+            assert is_elem_replaced(
+                orig_ds[tt], anon_ds[tt]
+            ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced"
+
 
 empty_tags = dicomfields.Z_TAGS + dicomfields.X_Z_TAGS
 
+
 def is_elem_empty(elem) -> bool:
     if elem.VR == "SQ":
         for x in elem.value:
             for tt in empty_tags:
-                if tt in x and len(x[tt].value)>0:
-                    assert is_elem_empty(x[tt]), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty"
+                if tt in x and len(x[tt].value) > 0:
+                    assert is_elem_empty(
+                        x[tt]
+                    ), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty"
         return True
 
     return elem.value in empty_values
 
+
 def test_empty_tags_are_emptied(orig_anon_dataset):
     orig_ds, anon_ds = orig_anon_dataset
 
     for tt in empty_tags:
         if tt in orig_ds and len(orig_ds[tt].value) > 0:
-            assert is_elem_empty(anon_ds[tt]), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" 
+            assert is_elem_empty(
+                anon_ds[tt]
+            ), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty"
diff --git a/tests/test_anonymization_without_dicom.py b/tests/test_anonymization_without_dicom.py
index 2c5a0b5..f9264b9 100644
--- a/tests/test_anonymization_without_dicom.py
+++ b/tests/test_anonymization_without_dicom.py
@@ -2,24 +2,25 @@
 
 from dicomanonymizer import anonymize_dataset
 
+
 def test_anonymization_without_dicom_file():
     # Create a list of tags object that should contains id, type and value
     fields = [
-        { # Replaced by Anonymized
-        'id': (0x0040, 0xA123),
-        'type': 'LO',
-        'value': 'Annie de la Fontaine',
+        {  # Replaced by Anonymized
+            "id": (0x0040, 0xA123),
+            "type": "LO",
+            "value": "Annie de la Fontaine",
+        },
+        {  # Replaced with empty value
+            "id": (0x0008, 0x0050),
+            "type": "TM",
+            "value": "bar",
         },
-        { # Replaced with empty value
-        'id': (0x0008, 0x0050),
-        'type': 'TM',
-        'value': 'bar',
+        {  # Deleted
+            "id": (0x0018, 0x4000),
+            "type": "VR",
+            "value": "foo",
         },
-        { # Deleted
-        'id': (0x0018, 0x4000),
-        'type': 'VR',
-        'value': 'foo',
-        }
     ]
 
     # Create a readable dataset for pydicom
@@ -27,6 +28,6 @@ def test_anonymization_without_dicom_file():
 
     # Add each field into the dataset
     for field in fields:
-        data.add_new(field['id'], field['type'], field['value'])
+        data.add_new(field["id"], field["type"], field["value"])
 
     anonymize_dataset(data)

From da91c4ff9e68a95ad3cae25fd30063957f29c21b Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Fri, 26 Jan 2024 01:12:00 -0500
Subject: [PATCH 06/17] silenced sourcery warnings

---
 tests/test_anon.py                        | 20 +++++++++++++-------
 tests/test_anonymization_without_dicom.py |  2 +-
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/tests/test_anon.py b/tests/test_anon.py
index 86b8a9d..a9c867e 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -60,11 +60,15 @@ def orig_anon_dataset(request):
 def test_deleted_tags_are_removed(orig_anon_dataset):
     orig_ds, anon_ds = orig_anon_dataset
     deleted_tags = dicomfields.X_TAGS
-    for tt in deleted_tags:
-        if len(tt) == 2 and tt in orig_ds:
+
+    for tt in deleted_tags:  # sourcery skip: no-loop-in-tests
+        if (
+            len(tt) == 2 and tt in orig_ds
+        ):  # sourcery skip: merge-nested-ifs, no-conditionals-in-tests
             # TODO: Investigate why Date type are replaced instead of deleted
             # See item #
-            if orig_ds[tt].VR != "DA":
+
+            if orig_ds[tt].VR != "DA":  # sourcery skip: no-conditionals-in-tests
                 assert (
                     tt not in anon_ds
                 ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}"
@@ -99,8 +103,8 @@ def is_elem_replaced(orig, anon) -> bool:
 def test_changed_tags_are_replaced(orig_anon_dataset):
     orig_ds, anon_ds = orig_anon_dataset
 
-    for tt in changed_tags:
-        if tt in orig_ds:
+    for tt in changed_tags:  # sourcery skip: no-loop-in-tests
+        if tt in orig_ds:  # sourcery skip: no-conditionals-in-tests
             assert (
                 tt in anon_ds
             ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!"
@@ -128,8 +132,10 @@ def is_elem_empty(elem) -> bool:
 def test_empty_tags_are_emptied(orig_anon_dataset):
     orig_ds, anon_ds = orig_anon_dataset
 
-    for tt in empty_tags:
-        if tt in orig_ds and len(orig_ds[tt].value) > 0:
+    for tt in empty_tags:  # sourcery skip: no-loop-in-tests
+        if (
+            tt in orig_ds and len(orig_ds[tt].value) > 0
+        ):  # sourcery skip: no-conditionals-in-tests
             assert is_elem_empty(
                 anon_ds[tt]
             ), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty"
diff --git a/tests/test_anonymization_without_dicom.py b/tests/test_anonymization_without_dicom.py
index f9264b9..7420c39 100644
--- a/tests/test_anonymization_without_dicom.py
+++ b/tests/test_anonymization_without_dicom.py
@@ -27,7 +27,7 @@ def test_anonymization_without_dicom_file():
     data = pydicom.Dataset()
 
     # Add each field into the dataset
-    for field in fields:
+    for field in fields:  # sourcery skip: no-loop-in-tests
         data.add_new(field["id"], field["type"], field["value"])
 
     anonymize_dataset(data)

From 2f07e1770259339db13aa9e08b9ee8616b76162a Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Fri, 26 Jan 2024 01:14:41 -0500
Subject: [PATCH 07/17] support spellchecker

---
 cspell.json | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 cspell.json

diff --git a/cspell.json b/cspell.json
new file mode 100644
index 0000000..cc89f42
--- /dev/null
+++ b/cspell.json
@@ -0,0 +1,30 @@
+// cSpell Settings
+{
+    // Version of the setting file.  Always 0.2
+    "version": "0.2",
+    // language - current active spelling language
+    "language": "en",
+    // ignorepaths - list of glob patterns for files to ignore
+    "ignorePaths": [
+        "ext",
+        "*.json",
+        "Pipfile.lock",
+        "**/tests"
+    ],
+    // words - list of words to be always considered correct
+    "words": [
+        "dcmread",
+        "DICOM",
+        "dicomfields",
+        "multival",
+        "pydicom",
+        "simpledicomanonymizer",
+        "xgggg"
+    ]
+    // flagWords - list of words to be always considered incorrect
+    // This is useful for offensive words and common spelling errors.
+    // For example "hte" should be "the"
+    // "flagWords": [
+    //     "hte"
+    // ]
+}

From 893ce24fc5b7d7bf552ec351adbfe58881668ab1 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Fri, 26 Jan 2024 01:20:57 -0500
Subject: [PATCH 08/17] typos and spellcheck

---
 cspell.json                   | 19 +++++++++++++++++++
 dicomanonymizer/anonymizer.py |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/cspell.json b/cspell.json
index cc89f42..267e407 100644
--- a/cspell.json
+++ b/cspell.json
@@ -13,12 +13,31 @@
     ],
     // words - list of words to be always considered correct
     "words": [
+        "anonymization",
+        "argparse",
+        "bdist",
         "dcmread",
+        "DICM",
         "DICOM",
+        "dicomanonymizer",
         "dicomfields",
+        "Edern",
+        "Fiducials",
+        "Fontaine",
+        "Haumont",
+        "Kitware",
+        "Laurenn",
         "multival",
+        "pipenv",
+        "Pipfile",
         "pydicom",
+        "pytest",
+        "Radiopharmaceutical",
+        "setuptools",
         "simpledicomanonymizer",
+        "tqdm",
+        "venv",
+        "virtualenv",
         "xgggg"
     ]
     // flagWords - list of words to be always considered incorrect
diff --git a/dicomanonymizer/anonymizer.py b/dicomanonymizer/anonymizer.py
index 6a457fd..6b2d234 100644
--- a/dicomanonymizer/anonymizer.py
+++ b/dicomanonymizer/anonymizer.py
@@ -26,7 +26,7 @@ def anonymize(input_path: str, output_path: str, anonymization_actions: dict, de
     Read data from input path (folder or file) and launch the anonymization.
 
     :param input_path: Path to a folder or to a file. If set to a folder,
-    then cross all over subfiles and apply anonymization.
+    then cross all over sub-folders and apply anonymization.
     :param output_path: Path to a folder or to a file.
     :param anonymization_actions: List of actions that will be applied on tags.
     :param deletePrivateTags: Whether to delete private tags.

From 9af7d7cf62f70c7eebd64642edb9be27d248bfae Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Fri, 26 Jan 2024 02:27:57 -0500
Subject: [PATCH 09/17] added issue number and link

---
 tests/test_anon.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_anon.py b/tests/test_anon.py
index a9c867e..ddb9760 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -30,7 +30,7 @@ def get_all_failed():
     # TODO: Investigate why these fail replacement test of anonymization
     # Error message: AttributeError: can't set attribute
     # when attempting to anonymize tag in dicomanonymizer\simpledicomanonymizer.py:108
-    # See issue ???
+    # See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/63
     replaced_failed = [
         "reportsi.dcm",
         "reportsi_with_empty_number_tags.dcm",
@@ -66,8 +66,7 @@ def test_deleted_tags_are_removed(orig_anon_dataset):
             len(tt) == 2 and tt in orig_ds
         ):  # sourcery skip: merge-nested-ifs, no-conditionals-in-tests
             # TODO: Investigate why Date type are replaced instead of deleted
-            # See item #
-
+            # See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/56
             if orig_ds[tt].VR != "DA":  # sourcery skip: no-conditionals-in-tests
                 assert (
                     tt not in anon_ds

From ad6b05a11ee5c8048b1fa004578e526d3fbd0c5e Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Fri, 26 Jan 2024 02:39:35 -0500
Subject: [PATCH 10/17] behaviour is correct spelling

---
 cspell.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cspell.json b/cspell.json
index 267e407..3b687ee 100644
--- a/cspell.json
+++ b/cspell.json
@@ -16,6 +16,7 @@
         "anonymization",
         "argparse",
         "bdist",
+        "behaviour",
         "dcmread",
         "DICM",
         "DICOM",

From bc08b06253f4f4bd163091e246a92316cd0b8833 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Thu, 22 Feb 2024 11:36:00 -0500
Subject: [PATCH 11/17] Fix for #63, can't set attribute

---
 dicomanonymizer/simpledicomanonymizer.py | 14 +++++++++++---
 tests/test_anon.py                       | 13 ++-----------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py
index a57625c..cece7f6 100644
--- a/dicomanonymizer/simpledicomanonymizer.py
+++ b/dicomanonymizer/simpledicomanonymizer.py
@@ -105,7 +105,7 @@ def replace_element(element):
     See https://laurelbridge.com/pdf/Dicom-Anonymization-Conformance-Statement.pdf
     """
     if element.VR in ('LO', 'LT', 'SH', 'PN', 'CS', 'ST', 'UT'):
-        element.value = 'Anonymized'
+        element.value = 'ANONYMIZED' # CS VR accepts only uppercase characters
     elif element.VR == 'UI':
         replace_element_UID(element)
     elif element.VR in ('DS', 'IS'):
@@ -118,8 +118,16 @@ def replace_element(element):
         element.value = b'Anonymized'
     elif element.VR == 'SQ':
         for sub_dataset in element.value:
-            for sub_element in sub_dataset.elements():
-                replace_element(sub_element)
+            for sub_element in sub_dataset.elements():                
+                if type(sub_element) == pydicom.dataelem.RawDataElement:
+                    # RawDataElement is a NamedTuple, so cannot set its value attribute.
+                    # Convert it to a DataElement, replace value, and set it back.
+                    # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63
+                    e2 = pydicom.dataelem.DataElement_from_raw(sub_element)
+                    replace_element(e2)
+                    sub_dataset.add(e2)
+                else:
+                    replace_element(sub_element)
     else:
         raise NotImplementedError('Not anonymized. VR {} not yet implemented.'.format(element.VR))
 
diff --git a/tests/test_anon.py b/tests/test_anon.py
index ddb9760..d9b95cc 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -27,16 +27,7 @@ def get_all_failed():
         "OT-PAL-8-face.dcm",
     ]
 
-    # TODO: Investigate why these fail replacement test of anonymization
-    # Error message: AttributeError: can't set attribute
-    # when attempting to anonymize tag in dicomanonymizer\simpledicomanonymizer.py:108
-    # See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/63
-    replaced_failed = [
-        "reportsi.dcm",
-        "reportsi_with_empty_number_tags.dcm",
-        "test-SR.dcm",
-    ]
-
+    replaced_failed = []
     deleted_failed = []
     emptied_failed = []
     return dcmread_failed + replaced_failed + deleted_failed + emptied_failed
@@ -82,7 +73,7 @@ def test_deleted_tags_are_removed(orig_anon_dataset):
     + dicomfields.X_Z_U_STAR_TAGS
 )
 
-empty_values = (0, "", "00010101", "000000.00")
+empty_values = (0, "", "00010101", "000000.00", "ANONYMIZED")
 
 
 def is_elem_replaced(orig, anon) -> bool:

From 0872799c108d899919e0b19aa2df96938cae31ad Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Thu, 22 Feb 2024 15:33:23 -0500
Subject: [PATCH 12/17] Suppressed filename copying warning

---
 tests/test_anon.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_anon.py b/tests/test_anon.py
index d9b95cc..13b908f 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -43,6 +43,7 @@ def get_passing_files():
 @pytest.fixture(scope="module", params=get_passing_files())
 def orig_anon_dataset(request):
     orig_ds = dcmread(request.param)
+    orig_ds.filename = None # Non-None value causes warnings in copy(). Not needed for this testing
     anon_ds = orig_ds.copy()
     anonymize_dataset(anon_ds)
     return (orig_ds, anon_ds)

From 428ccb304dfcc2698363b32ac3ae31e4406a8a0e Mon Sep 17 00:00:00 2001
From: Paul Choisel <paul.choisel@kitware.com>
Date: Mon, 15 Jan 2024 13:31:24 +0100
Subject: [PATCH 13/17] ENH: Add dicom fields scrapping script

---
 .gitignore                    |   1 +
 pyproject.toml                |   3 +
 scripts/README.md             |  12 ++++
 scripts/scrap_DICOM_fields.py | 111 ++++++++++++++++++++++++++++++++++
 4 files changed, 127 insertions(+)
 create mode 100644 scripts/README.md
 create mode 100644 scripts/scrap_DICOM_fields.py

diff --git a/.gitignore b/.gitignore
index ff4c4a7..7d27825 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ build
 *.egg-info
 # Ignore Pipfile.lock, since different versions of python and OS produce different hashes
 Pipfile.lock
+.python-version
diff --git a/pyproject.toml b/pyproject.toml
index 026195b..886926e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,9 @@ dependencies = [
 dev = [
   "pytest",
   "setuptools", # Needed to load pydicom's test files
+  "bs4",
+  "fire",
+  "requests"
 ]
 
 [project.scripts]
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..1a11a6e
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,12 @@
+# Script folder
+
+This folder contains utility scripts for the maintenance of the package.
+
+## scrap_DICOM_fields.py
+
+This script downloads a web page and tries to scrap the DICOM fields and their anonymization command from it.
+
+1. Pull the repository: `git clone https://github.com/KitwareMedical/dicom-anonymizer.git`
+1. Go in the repository: `cd dicom-anonymizer`
+1. Install the dependencies: `pip install -e '.[dev]'`
+1. Run the script: `python scripts/scrap_DICOM_fields.py` (Run it with `-h` to get a list of arguments)
\ No newline at end of file
diff --git a/scripts/scrap_DICOM_fields.py b/scripts/scrap_DICOM_fields.py
new file mode 100644
index 0000000..66f742f
--- /dev/null
+++ b/scripts/scrap_DICOM_fields.py
@@ -0,0 +1,111 @@
+"""
+Download a web page and try to scrap the DICOM fields and their anonymization command from it.
+
+Written by Mohammad Khawar Zia
+"""
+
+import fire
+import requests
+
+from collections import defaultdict
+from bs4 import BeautifulSoup
+
+
+dicom_fields_header = """# Tags anonymized in DICOM standard
+# Documentation for groups meaning can be found in default associated actions.
+# https://dicom.nema.org/medical/dicom/current/output/chtml/part15/chapter_e.html
+
+"""
+
+dicom_fields_footer = """# Contains all previous tags into one array
+ALL_TAGS = []
+ALL_TAGS.extend(D_TAGS)
+ALL_TAGS.extend(Z_TAGS)
+ALL_TAGS.extend(X_TAGS)
+ALL_TAGS.extend(U_TAGS)
+ALL_TAGS.extend(Z_D_TAGS)
+ALL_TAGS.extend(X_Z_TAGS)
+ALL_TAGS.extend(X_D_TAGS)
+ALL_TAGS.extend(X_Z_D_TAGS)
+ALL_TAGS.extend(X_Z_U_STAR_TAGS)
+"""
+
+
+def scrap_profiles(url):
+    page = requests.get(url)
+    soup = BeautifulSoup(page.content, "html.parser")
+
+    headers = [th.text for th in soup.find(attrs={'id': 'table_E.1-1'}).parent.find('table').find('thead').find_all('strong')]
+    data = []
+
+
+    for tr in soup.find(attrs={'id': 'table_E.1-1'}).parent.find('table').find('tbody').find_all('tr'):
+        tmp = {key: value.text.strip() for key, value in dict(zip(headers, tr.find_all('td'))).items() if key in ['Attribute Name', 'Tag', 'Basic Prof.']}
+        tmp2 = (tmp.get('Tag'), tmp.get('Attribute Name'), tmp.get('Basic Prof.'))
+        data.append(tmp2)
+
+    data = sorted(data, key=lambda ele: (ele[2], ele[1]))
+
+
+    profiles = defaultdict(list)
+    fields_to_skip = {
+        'Private Attributes',
+    }
+    for tag, name, profile in data:
+        if name in fields_to_skip:
+            continue
+
+        if name == 'Curve Data':
+            new_tag = '(0x5000, 0x0000, 0xFF00, 0x0000)'
+        elif name == 'Overlay Comments':
+            new_tag = '(0x6000, 0x4000, 0xFF00, 0xFFFF)'
+        elif name == 'Overlay Data':
+            new_tag = '(0x6000, 0x3000, 0xFF00, 0xFFFF)'
+        else:
+            new_tag = list(tag)
+            new_tag.insert(6, '0x')
+            new_tag.insert(6, ' ')
+            new_tag.insert(1, '0x')
+            new_tag = ''.join(new_tag)
+
+        name = name.replace('\u200b', '').replace('\n', '')
+        string = f'{new_tag}, # {name}'
+        profiles[profile].append(string)
+
+    return profiles
+
+
+def create_DICOM_fields(profiles):
+    dicom_fields = ""
+    for tag, tag_list, comment in (
+        ('D', 'D_TAGS', '# Replaced tags'),
+        ('Z', 'Z_TAGS', "# Replaced with empty values (0, '', ...)"),
+        ('X', 'X_TAGS', '# Deleted tags'),
+        ('U', 'U_TAGS', '# Replace UID'),
+
+        ('Z/D', 'Z_D_TAGS', '# Replace element according to the VR'),
+        ('X/Z', 'X_Z_TAGS', '# Set the value to empty according to the VR'),
+        ('X/D', 'X_D_TAGS', "# Replace element according to the VR"),
+
+        ('X/Z/D', 'X_Z_D_TAGS', '# Replace element according to the VR'),
+        ('X/Z/U*', 'X_Z_U_STAR_TAGS',
+        '# Replace element with UI as VR, else replace according to VR with empty values'),
+    ):
+        dicom_fields += f'{comment}\n{tag_list} = [\n'
+        for profile in profiles.get(tag):
+            dicom_fields += f'    {profile}\n'
+        dicom_fields += ']\n\n'
+
+    return dicom_fields_header + dicom_fields + dicom_fields_footer
+
+
+def main(
+        url="https://dicom.nema.org/medical/dicom/current/output/chtml/part15/chapter_e.html",
+        output_path='dicomanonymizer/dicomfields.py'):
+    profiles = scrap_profiles(url)
+    file_content = create_DICOM_fields(profiles=profiles)
+    with open(output_path, 'w') as file:
+        file.write(file_content)
+
+if __name__ == '__main__':
+  fire.Fire(main)
\ No newline at end of file

From 2aae45fdb1957579431d30a604c2a6b90e7abff2 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Mon, 26 Feb 2024 13:01:07 -0500
Subject: [PATCH 14/17] isinstance instead of type

---
 dicomanonymizer/simpledicomanonymizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py
index cece7f6..518570a 100644
--- a/dicomanonymizer/simpledicomanonymizer.py
+++ b/dicomanonymizer/simpledicomanonymizer.py
@@ -119,7 +119,7 @@ def replace_element(element):
     elif element.VR == 'SQ':
         for sub_dataset in element.value:
             for sub_element in sub_dataset.elements():                
-                if type(sub_element) == pydicom.dataelem.RawDataElement:
+                if isinstance(sub_element, pydicom.dataelem.RawDataElement):
                     # RawDataElement is a NamedTuple, so cannot set its value attribute.
                     # Convert it to a DataElement, replace value, and set it back.
                     # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63

From ae2646a9dec2e234ab2d9bfc9bcf518b2a2e249a Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Mon, 26 Feb 2024 13:03:11 -0500
Subject: [PATCH 15/17] formatting, spell-check, issue linking

---
 tests/test_anon.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/test_anon.py b/tests/test_anon.py
index 13b908f..b5e003a 100644
--- a/tests/test_anon.py
+++ b/tests/test_anon.py
@@ -16,7 +16,7 @@
 warnings.filterwarnings("ignore")
 
 
-def get_all_failed():
+def get_all_failed():  # sourcery skip: inline-immediately-returned-variable
     # The following files are intended to fail dcmread
     # No point including them for anonymization testing
     dcmread_failed = [
@@ -27,10 +27,7 @@ def get_all_failed():
         "OT-PAL-8-face.dcm",
     ]
 
-    replaced_failed = []
-    deleted_failed = []
-    emptied_failed = []
-    return dcmread_failed + replaced_failed + deleted_failed + emptied_failed
+    return dcmread_failed
 
 
 @lru_cache(maxsize=None)

From 69890cfcdcfd8dbdf85cf686736d3f192ad89670 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Thu, 22 Feb 2024 11:36:00 -0500
Subject: [PATCH 16/17] Fix for #63, can't set attribute

---
 dicomanonymizer/simpledicomanonymizer.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py
index 518570a..83b6c5e 100644
--- a/dicomanonymizer/simpledicomanonymizer.py
+++ b/dicomanonymizer/simpledicomanonymizer.py
@@ -119,7 +119,11 @@ def replace_element(element):
     elif element.VR == 'SQ':
         for sub_dataset in element.value:
             for sub_element in sub_dataset.elements():                
+<<<<<<< HEAD
                 if isinstance(sub_element, pydicom.dataelem.RawDataElement):
+=======
+                if type(sub_element) == pydicom.dataelem.RawDataElement:
+>>>>>>> ee535a8 (Fix for #63, can't set attribute)
                     # RawDataElement is a NamedTuple, so cannot set its value attribute.
                     # Convert it to a DataElement, replace value, and set it back.
                     # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63

From 118921076e0015afcbc1226e2bf938ce7fc35b43 Mon Sep 17 00:00:00 2001
From: Sanjay Joshi <smjoshi@globusmedical.com>
Date: Mon, 26 Feb 2024 13:01:07 -0500
Subject: [PATCH 17/17] isinstance instead of type

---
 dicomanonymizer/simpledicomanonymizer.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py
index 83b6c5e..518570a 100644
--- a/dicomanonymizer/simpledicomanonymizer.py
+++ b/dicomanonymizer/simpledicomanonymizer.py
@@ -119,11 +119,7 @@ def replace_element(element):
     elif element.VR == 'SQ':
         for sub_dataset in element.value:
             for sub_element in sub_dataset.elements():                
-<<<<<<< HEAD
                 if isinstance(sub_element, pydicom.dataelem.RawDataElement):
-=======
-                if type(sub_element) == pydicom.dataelem.RawDataElement:
->>>>>>> ee535a8 (Fix for #63, can't set attribute)
                     # RawDataElement is a NamedTuple, so cannot set its value attribute.
                     # Convert it to a DataElement, replace value, and set it back.
                     # Found in https://github.com/KitwareMedical/dicom-anonymizer/issues/63