WIP Draft integration tests

datajoint · kabilar · Jul 12, 2022 · May 2, 2022 · May 4, 2022 · May 4, 2022
commit 2f842b278dbb080843ae852a1f26f89cc4d07bca
diff --git a/README.md b/README.md
@@ -19,9 +19,13 @@ convention, and directory lookup methods (see
 [workflow_miniscope/ingest.py](workflow_miniscope/ingest.py))
 3. Processing results.
 
-See the [Element Miniscope documentation](https://elements.datajoint.org/description/miniscope/) for the background information and development timeline.
+See the 
+[Element Miniscope documentation](https://elements.datajoint.org/description/miniscope/)
+ for the background information and development timeline.
 
-For more information on the DataJoint Elements project, please visit https://elements.datajoint.org.  This work is supported by the National Institutes of Health.
+For more information on the DataJoint Elements project, please visit 
+https://elements.datajoint.org.  This work is supported by the National Institutes of 
+Health.
 
 ## Workflow architecture
 
@@ -41,21 +45,35 @@ DataJoint Elements ([element-lab](https://github.com/datajoint/element-lab),
 
 ## Interacting with the DataJoint workflow
 
++ Our [YouTube tutorial](https://www.youtube.com/watch?v=nWUcPFZOSVw) walks through all 
+  the key details of this workflow.
+
 + Please refer to the following workflow-specific 
  [Jupyter notebooks](/notebooks) for an in-depth explanation of how to run the 
  workflow ([03-process.ipynb](notebooks/03-process.ipynb)) and explore the data 
  ([05-explore.ipynb](notebooks/05-explore.ipynb)).
 
+
 ## Citation
 
-+ If your work uses DataJoint and DataJoint Elements, please cite the respective Research Resource Identifiers (RRIDs) and manuscripts.
++ If your work uses DataJoint and DataJoint Elements, please cite the respective
+  Research Resource Identifiers (RRIDs) and manuscripts.
 
 + DataJoint for Python or MATLAB
-    + Yatsenko D, Reimer J, Ecker AS, Walker EY, Sinz F, Berens P, Hoenselaar A, Cotton RJ, Siapas AS, Tolias AS. DataJoint: managing big scientific data using MATLAB or Python. bioRxiv. 2015 Jan 1:031658. doi: https://doi.org/10.1101/031658
+    + Yatsenko D, Reimer J, Ecker AS, Walker EY, Sinz F, Berens P, Hoenselaar A, Cotton
+      RJ, Siapas AS, Tolias AS. DataJoint: managing big scientific data using MATLAB or
+      Python. bioRxiv. 2015 Jan 1:031658. doi: https://doi.org/10.1101/031658
 
-    + DataJoint ([RRID:SCR_014543](https://scicrunch.org/resolver/SCR_014543)) - DataJoint for `<Select Python or MATLAB>` (version `<Enter version number>`)
+    + DataJoint (
+      [RRID:SCR_014543](https://scicrunch.org/resolver/SCR_014543)) - DataJoint for `<Select Python or MATLAB>` (version 
+        `<Enter version number>`)
 
 + DataJoint Elements
-    + Yatsenko D, Nguyen T, Shen S, Gunalan K, Turner CA, Guzman R, Sasaki M, Sitonic D, Reimer J, Walker EY, Tolias AS. DataJoint Elements: Data Workflows for Neurophysiology. bioRxiv. 2021 Jan 1. doi: https://doi.org/10.1101/2021.03.30.437358
-
-    + DataJoint Elements ([RRID:SCR_021894](https://scicrunch.org/resolver/SCR_021894)) - Element Miniscope (version `<Enter version number>`)
+    + Yatsenko D, Nguyen T, Shen S, Gunalan K, Turner CA, Guzman R, Sasaki M, Sitonic D,
+      Reimer J, Walker EY, Tolias AS. DataJoint Elements: Data Workflows for
+      Neurophysiology. bioRxiv. 2021 Jan 1. doi:
+      https://doi.org/10.1101/2021.03.30.437358
+
+    + DataJoint Elements (
+      [RRID:SCR_021894](https://scicrunch.org/resolver/SCR_021894)) - Element Miniscope 
+      (version `<Enter version number>`)
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -2,11 +2,9 @@
 
 import os
 import pytest
-# import pandas as pd
 import pathlib
 import datajoint as dj
-# import importlib
-# import numpy as np
+import numpy as np
 import sys
 
 # from workflow_miniscope.paths import get_miniscope_root_data_dir
@@ -19,9 +17,22 @@
 test_user_data_dir = pathlib.Path('./tests/user_data')
 test_user_data_dir.mkdir(exist_ok=True)
 
+sessions_dirs = ['subject1/session1']
+
 # ------------------ GENERAL FUNCTIONS ------------------
 
 
+def write_csv(content, path):
+    """
+    General function for writing strings to lines in CSV
+    :param path: pathlib PosixPath
+    :param content: list of strings, each as row of CSV
+    """
+    with open(path, 'w') as f:
+        for line in content:
+            f.write(line+'\n')
+
+
 class QuietStdOut:
     """If verbose set to false, used to quiet tear_down table.delete prints"""
     def __enter__(self):
@@ -49,6 +60,39 @@ def dj_config():
     return
 
 
+@pytest.fixture(autouse=True)
+def test_data(dj_config):
+    test_data_dir = pathlib.Path(dj.config['custom']['miniscope_root_data_dir'])
+
+    test_data_exists = np.all([(test_data_dir / p).exists() for p in sessions_dirs])
+
+    if not test_data_exists:
+        try:
+            dj.config['custom'].update({
+                'djarchive.client.endpoint': os.environ['DJARCHIVE_CLIENT_ENDPOINT'],
+                'djarchive.client.bucket': os.environ['DJARCHIVE_CLIENT_BUCKET'],
+                'djarchive.client.access_key': os.environ['DJARCHIVE_CLIENT_ACCESSKEY'],
+                'djarchive.client.secret_key': os.environ['DJARCHIVE_CLIENT_SECRETKEY']
+            })
+        except KeyError as e:
+            raise FileNotFoundError(
+                f'Test data not available at {test_data_dir}.'
+                f'\nAttempting to download from DJArchive,'
+                f' but no credentials found in environment variables.'
+                f'\nError: {str(e)}')
+
+        import djarchive_client
+        from workflow_miniscope import version
+
+        client = djarchive_client.client()
+        workflow_version = version.__version__
+
+        client.download('workflow-miniscope-test-set',
+                        workflow_version.replace('.', '_'),
+                        str(test_data_dir), create_target=False)
+    return
+
+
 @pytest.fixture
 def pipeline():
     """ Loads workflow_miniscope.pipeline lab, session, subject, miniscope"""
@@ -73,3 +117,171 @@ def pipeline():
                 pipeline.subject.Subject.delete()
                 pipeline.session.Session.delete()
                 pipeline.lab.Lab.delete()
+
+
+@pytest.fixture
+def subjects_csv():
+    """ Create a 'subjects.csv' file"""
+    subject_content = ["subject,sex,subject_birth_date,subject_description",
+                       "subject1,M,2021-01-01 00:00:01,Theo"]
+    subject_csv_path = pathlib.Path('./tests/user_data/subjects.csv')
+    write_csv(subject_content, subject_csv_path)
+
+    yield subject_content, subject_csv_path
+    if _tear_down:
+        subject_csv_path.unlink()
+
+
+@pytest.fixture
+def ingest_subjects(pipeline, subjects_csv):
+    from workflow_miniscope.ingest import ingest_subjects
+    _, subjects_csv_path = subjects_csv
+    ingest_subjects(subjects_csv_path)
+    return
+
+
+@pytest.fixture
+def sessions_csv(test_data):
+    """ Create a 'sessions.csv' file"""
+    session_csv_path = pathlib.Path('./tests/user_data/sessions.csv')
+    session_content = ["subject,session_dir,acquisition_software",
+                       "subject1,subject1/session1,Miniscope-DAQ-V4"]
+    write_csv(session_content, session_csv_path)
+
+    yield session_content, session_csv_path
+    if _tear_down:
+        session_csv_path.unlink()
+
+
+@pytest.fixture
+def ingest_sessions(ingest_subjects, sessions_csv):
+    from workflow_miniscope.ingest import ingest_sessions
+    _, sessions_csv_path = sessions_csv
+    ingest_sessions(sessions_csv_path)
+    return
+
+
+@pytest.fixture
+def testdata_paths():
+    return {
+        'caiman_2d': 'subject1/session1/caiman'
+    }
+
+
+@pytest.fixture
+def caiman2D_paramset(pipeline):
+    miniscope = pipeline['miniscope']
+
+    params_caiman_2d = dict(decay_time=0.4,
+                            pw_rigid=False,
+                            max_shifts=(5, 5),
+                            gSig_filt=(3, 3),
+                            strides=(48, 48),
+                            overlaps=(24, 24),
+                            max_deviation_rigid=3,
+                            border_nan='copy',
+                            method_init='corr_pnr',
+                            K=None,
+                            gSig=(3, 3),
+                            gSiz=(13, 13),
+                            merge_thr=0.7,
+                            p=1,
+                            tsub=2,
+                            ssub=1,
+                            rf=40,
+                            stride=20,
+                            only_init=True,
+                            nb=0,
+                            nb_patch=0,
+                            method_deconvolution='oasis',
+                            low_rank_background=None,
+                            update_background_components=True,
+                            min_corr=0.8,
+                            min_pnr=10,
+                            normalize_init=False,
+                            center_psf=True,
+                            ssub_B=2,
+                            ring_size_factor=1.4,
+                            del_duplicates=True,
+                            border_pix=0,
+                            min_SNR=3,
+                            rval_thr=0.85,
+                            use_cnn=False)
+
+    miniscope.ProcessingParamSet.insert_new_params(
+        processing_method='caiman',
+        paramset_id=0,
+        paramset_desc='Calcium imaging analysis with CaImAn using default parameters',
+        params=params_caiman_2d)
+
+    yield params_caiman_2d
+
+    if _tear_down:
+        (miniscope.ProcessingParamSet & 'paramset_id = 0').delete()
+
+
+@pytest.fixture
+def recording_info(pipeline, ingest_sessions):
+    miniscope = pipeline['miniscope']
+
+    miniscope.RecordingInfo.populate()
+
+    yield
+
+    if _tear_down:
+        miniscope.RecordingInfo.delete()
+
+
+@pytest.fixture
+def processing_tasks(pipeline, caiman2D_paramset, recording_info):
+    global is_multi_scan_processing
+
+    miniscope = pipeline['miniscope']
+    session = pipeline['session']
+    get_miniscope_root_data_dir = pipeline['get_miniscope_root_data_dir']
+    root_dir = pathlib.Path(get_miniscope_root_data_dir())
+
+    for scan_key in (session.Session & miniscope.Recording_Info
+                     - miniscope.ProcessingTask).fetch('KEY'):
+        scan_file = root_dir / (miniscope.Recording_Info.File
+                                & scan_key).fetch('file_path')[0]
+        recording_dir = scan_file.parent
+        caiman_dir = (recording_dir / 'caiman').as_posix()
+        if caiman_dir.exists():
+            miniscope.ProcessingTask.insert1({**scan_key,
+                                              'paramset_id': 1,
+                                              'processing_output_dir': caiman_dir})
+
+    yield
+
+    if _tear_down:
+        miniscope.ProcessingTask.delete()
+
+
+@pytest.fixture
+def processing(processing_tasks, pipeline):
+    miniscope = pipeline['miniscope']
+
+    errors = miniscope.Processing.populate(suppress_errors=True)
+
+    if errors:
+        print(f'Populate ERROR: {len(errors)} errors in '
+              + f'"miniscope.Processing.populate()" - {errors[0][-1]}')
+
+    yield
+
+    if _tear_down:
+        miniscope.Processing.delete()
+
+
+@pytest.fixture
+def curations(processing, pipeline):
+    miniscope = pipeline['miniscope']
+
+    for key in (miniscope.Processing - miniscope.Curation).fetch('KEY'):
+        miniscope.Curation().create1_from_processing_task(key)
+
+    yield
+
+    if _tear_down:
+        miniscope.Curation.delete()
diff --git a/tests/test_ingest.py b/tests/test_ingest.py
@@ -14,40 +14,29 @@
 
 def test_ingest_subjects(pipeline, ingest_subjects):
     subject = pipeline['subject']
-    assert len(subject.Subject()) == 3
+    assert len(subject.Subject()) == 1
 
 
 def test_ingest_sessions(pipeline, sessions_csv, ingest_sessions):
-    scan = pipeline['scan']
     session = pipeline['session']
-    get_imaging_root_data_dir = pipeline['get_imaging_root_data_dir']
+    get_miniscope_root_data_dir = pipeline['get_miniscope_root_data_dir']
 
-    assert len(session.Session()) == 4
-    assert len(scan.Scan()) == 4
+    assert len(session.Session()) == 1
 
     sessions, _ = sessions_csv
     sess = sessions.iloc[3]
-    sess_dir = pathlib.Path(sess.session_dir).relative_to(get_imaging_root_data_dir())
+    sess_dir = pathlib.Path(sess.session_dir).relative_to(get_miniscope_root_data_dir())
     assert (session.SessionDirectory
             & {'subject': sess.name}).fetch1('session_dir') == sess_dir.as_posix()
 
 
-def test_paramset_insert(caiman2D_paramset, caiman3D_paramset, pipeline):
-    imaging = pipeline['imaging']
-    from element_calcium_imaging.imaging import dict_to_uuid
+def test_paramset_insert(caiman2D_paramset, pipeline):
+    miniscope = pipeline['miniscope']
+    from element_interface.utils import dict_to_uuid
 
-    method, desc, paramset_hash = (imaging.ProcessingParamSet & {'paramset_idx': 1}
+    method, desc, paramset_hash = (miniscope.ProcessingParamSet & {'paramset_idx': 1}
                                    ).fetch1('processing_method', 'paramset_desc',
                                             'param_set_hash')
     assert method == 'caiman'
-    assert desc == 'Calcium imaging analysis' \
-                   ' with CaImAn using default CaImAn parameters for 2d planar images'
+    assert desc == 'Calcium imaging analysis with CaImAn using default parameters'
     assert dict_to_uuid(caiman2D_paramset) == paramset_hash
-
-    method, desc, paramset_hash = (imaging.ProcessingParamSet & {'paramset_idx': 2}
-                                   ).fetch1('processing_method', 'paramset_desc',
-                                            'param_set_hash')
-    assert method == 'caiman'
-    assert desc == 'Calcium imaging analysis' \
-                   ' with CaImAn w/default CaImAn parameters for 3d volumetric images'
-    assert dict_to_uuid(caiman3D_paramset) == paramset_hash
diff --git a/user_data/subjects.csv b/user_data/subjects.csv
@@ -1,2 +1,2 @@
 subject,sex,subject_birth_date,subject_description
-subject1,M,2021-01-01 00:00:01,
+subject1,M,2021-01-01 00:00:01,Theo