data61 · hardbyte · Mar 24, 2020 · Mar 11, 2020 · Mar 12, 2020 · Mar 12, 2020
diff --git a/.azurePipeline/k8s_test_job.yaml.tmpl b/.azurePipeline/k8s_test_job.yaml.tmpl
@@ -25,12 +25,12 @@ spec:
             claimName: $PVC
       containers:
         - name: entitytester
-          image: $IMAGE_NAME_WITH_TAG
+          image: $TEST_E2E_IMAGE_NAME_WITH_TAG
           imagePullPolicy: Always
           env:
             - name: SERVER
               value: http://$SERVICE
-          command: ["dockerize", "-wait", "http://$SERVICE/api/v1/status", "-timeout", "5m", "python", "-m", "pytest", "-n", "4", "entityservice/tests", "-x", "--junit-xml=/mnt/results.xml"]
+          command: ["dockerize", "-wait", "http://$SERVICE/api/v1/status", "-timeout", "5m", "python", "-m", "pytest", "-n", "4", "e2etests/tests", "-x", "--junit-xml=/mnt/results.xml"]
           volumeMounts:
             - mountPath: /mnt
               name: results

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -21,6 +21,7 @@
 
 variables:
   backendImageName: data61/anonlink-app
+  testE2EImageName: data61/anonlink-test
   frontendImageName: data61/anonlink-nginx
   tutorialImageName: data61/anonlink-docs-tutorials
   benchmarkingImageName: data61/anonlink-benchmark
@@ -89,6 +90,26 @@ stages:
       imageName: data61/anonlink-app
       dockerBuildVersion: "$[dependencies.HashBaseDependencies.outputs['SetDockerBaseTag.DOCKER_BASE_TAG']]"
 
+- stage: stage_docker_e2e_test_image_build
+  displayName: Build E2E Test Docker image
+  dependsOn: [stage_base_docker_image_build]
+  jobs:
+  # Why do we recompute the base hash? Because we can't pass variables between stages.
+  # https://github.com/microsoft/azure-pipelines-tasks/issues/4743
+  - job: HashBaseDependencies
+    displayName: Hash Dependencies
+    pool:
+      vmImage: 'ubuntu-latest'
+    steps:
+      - template: .azurePipeline/templateSetVariableDockerBaseTag.yml
+  - template: .azurePipeline/templateDockerBuildPush.yml
+    parameters:
+      folder: './e2etests'
+      jobName: 'anonlink_e2e_test'
+      dependsOn: HashBaseDependencies
+      imageName: data61/anonlink-test
+      dockerBuildVersion: "$[dependencies.HashBaseDependencies.outputs['SetDockerBaseTag.DOCKER_BASE_TAG']]"
+
 - stage: stage_docker_nginx_image_build
   displayName: Nginx Docker build
   dependsOn: []
@@ -219,6 +240,7 @@ stages:
           echo "##vso[task.setvariable variable=PVC]$(DEPLOYMENT)-test-results"
           echo "##vso[task.setvariable variable=SERVICE]$(DEPLOYMENT)-entity-service-server"
           echo $(backendImageName):$(DOCKER_TAG) | xargs -I@ echo "##vso[task.setvariable variable=IMAGE_NAME_WITH_TAG]@"
+          echo $(testE2EImageName):$(DOCKER_TAG) | xargs -I@ echo "##vso[task.setvariable variable=TEST_E2E_IMAGE_NAME_WITH_TAG]@"
           echo $(DEPLOYMENT)-tmppod | xargs -I@ echo "##vso[task.setvariable variable=POD_NAME]@"
         displayName: 'Set variables for service, test result volume and pod'
       - task: Kubernetes@1
@@ -304,7 +326,7 @@ stages:
           cat .azurePipeline/k8s_test_job.yaml.tmpl | \
           sed 's|\$PVC'"|$(PVC)|g" | \
           sed 's|\$DEPLOYMENT_NAME'"|$(DEPLOYMENT)|g" | \
-          sed 's|\$IMAGE_NAME_WITH_TAG'"|$(IMAGE_NAME_WITH_TAG)|g" | \
+          sed 's|\$TEST_E2E_IMAGE_NAME_WITH_TAG'"|$(TEST_E2E_IMAGE_NAME_WITH_TAG)|g" | \
           sed 's|\$SERVICE'"|$(SERVICE)|g" > $(Build.ArtifactStagingDirectory)/k8s_test_job.yaml
         displayName: 'Prepare integration test job from template'
 

diff --git a/backend/entityservice/tests/config.py b/backend/entityservice/tests/config.py
diff --git a/backend/entityservice/tests/conftest.py b/backend/entityservice/tests/conftest.py
@@ -1,183 +0,0 @@
-import os
-import time
-import pytest
-import requests as requests_library
-import itertools
-
-from entityservice.tests.util import create_project_upload_fake_data, delete_project, create_project_no_data
-
-THROTTLE_SLEEP = 0.2
-
-
-@pytest.fixture(scope='session')
-def requests():
-    """
-    We inject the requests session.
-    For now we just add a small sleep after every request to ensure we don't get throttled when
-    tests run back to back. Note the rate limit in nginx is 10 requests per ip per second.
-    """
-    def delay_next(r, *args, **kwargs):
-        time.sleep(THROTTLE_SLEEP)
-
-    testing_session = requests_library.Session()
-    testing_session.hooks['response'].append(delay_next)
-    yield testing_session
-
-
-# Parameterising on:
-#
-# - pairs of dataset sizes
-# - overlap of the sizes
-# - result_type for 2 parties in ['similarity_scores', 'permutations'] and for more parties in ['groups']
-# - threshold
-
-ENVVAR_NAME = 'ENTITY_SERVICE_RUN_SLOW_TESTS'
-THRESHOLDS = [0.9, 1.0]
-OVERLAPS = [0.0, 0.9]
-ENCODING_SIZES = [8]
-NUMBERS_PARTIES = [2, 3, 5]
-
-if os.getenv(ENVVAR_NAME):
-    ENCODING_SIZES.extend([64, 128, 512, 2048])
-    OVERLAPS.extend([0.2, 0.5, 1.0])
-    THRESHOLDS.extend([0.6, 0.8, 0.95])
-
-FAST_SIZES_2P = tuple(itertools.product([1, 1000], repeat=2))
-FAST_SIZES_NP = tuple(itertools.chain(
-    FAST_SIZES_2P,
-    [(1, 1000, 1000),
-     (1000, 1, 1000),
-     (1000, 1000, 1),
-     (1000, 1000, 1000),
-     (1000, 1000, 1000, 1000, 1000)]))
-
-SLOW_SIZES_2P = tuple(itertools.combinations([1, 10000, 100000, 1000000], 2))
-SLOW_SIZES_NP = tuple(itertools.chain(
-    SLOW_SIZES_2P,
-    itertools.product(
-        [10000, 100000], [10000, 100000], [100000, 1000000]),
-    ((10000, 10000, 100000, 100000, 1000000),)))
-
-SIZES_2P = (tuple(itertools.chain(FAST_SIZES_2P, SLOW_SIZES_2P))
-            if os.getenv(ENVVAR_NAME)
-            else FAST_SIZES_2P)
-SIZES_NP = (tuple(itertools.chain(FAST_SIZES_NP, SLOW_SIZES_NP))
-            if os.getenv(ENVVAR_NAME)
-            else FAST_SIZES_NP)
-
-PROJECT_PARAMS_2P = tuple(
-    itertools.product(SIZES_2P, OVERLAPS, ENCODING_SIZES))
-PROJECT_PARAMS_NP = tuple(
-    itertools.product(SIZES_NP, OVERLAPS, ENCODING_SIZES))
-PROJECT_RESULT_TYPES_2P = ['similarity_scores', 'permutations']
-PROJECT_RESULT_TYPES_NP = ['groups']
-
-
-def create_project_response(requests, size, overlap, result_type, encoding_size=128):
-    """
-    Create a project with the given size, overlap and result_type.
-
-    Tests that use one of these projects will get a dict like the following:
-
-    {
-        "project_id": "ID",
-        "upload-mode": "BINARY" | "JSON",
-        "size": [size 1, size 2],
-        "encoding-size": int number of bytes in each encoding e.g. 128,
-        "overlap": float between 0 and 1,
-        "result_token": "TOKEN",
-        "upload_tokens": [TOKENS, ...],
-        "dp_1": <JSON RESPONSE TO DATA UPLOAD>
-        "dp_2": <JSON RESPONSE TO DATA UPLOAD>
-    }
-    """
-    project, dp_responses = create_project_upload_fake_data(
-        requests, size, overlap=overlap, result_type=result_type, encoding_size=encoding_size)
-    project.update({
-        'size': size,
-        'encoding-size': encoding_size,
-        'upload-mode': 'JSON',
-        'overlap': overlap,
-        'dp_responses': dp_responses
-    })
-    return project
-
-
-@pytest.fixture(scope='function', params=PROJECT_PARAMS_2P)
-def similarity_scores_project(request, requests):
-    size, overlap, encoding_size = request.param
-    prj = create_project_response(requests, size, overlap, 'similarity_scores', encoding_size)
-    yield prj
-    delete_project(requests, prj)
-
-
-@pytest.fixture(scope='function', params=tuple(itertools.chain(
-    [(t, 2) for t in PROJECT_RESULT_TYPES_2P],
-    [(t, n) for t in PROJECT_RESULT_TYPES_NP for n in NUMBERS_PARTIES])))
-def result_type_number_parties(request):
-    yield request.param
-
-
-@pytest.fixture(params=(
-    *[(t, n) for t in PROJECT_RESULT_TYPES_2P
-             for n in (None, 2)],
-    *[(t, n) for t in PROJECT_RESULT_TYPES_NP
-             for n in (None, *NUMBERS_PARTIES)]))
-def result_type_number_parties_or_none(request):
-    yield request.param
-
-
-@pytest.fixture
-def valid_project_params(request, result_type_number_parties_or_none):
-    result_type, number_parties_or_none = result_type_number_parties_or_none
-    # None is what we use to test handling of default values
-    params_dict = {'result_type': result_type}
-    if number_parties_or_none is not None:
-        params_dict['number_parties'] = number_parties_or_none
-    return params_dict
-
-
-@pytest.fixture(scope='function')
-def project(request, requests, result_type_number_parties):
-    result_type, number_parties = result_type_number_parties
-    project = create_project_no_data(
-        requests,
-        result_type=result_type,
-        number_parties=number_parties)
-    yield project
-    # Release project resource
-    delete_project(requests, project)
-
-
-@pytest.fixture(scope='function', params=ENCODING_SIZES)
-def encoding_size(request):
-    yield request.param
-
-
-@pytest.fixture(scope='function', params=THRESHOLDS)
-def threshold(request):
-    yield request.param
-
-
-@pytest.fixture(scope='function', params=PROJECT_PARAMS_2P)
-def permutations_project(request, requests):
-    size, overlap, encoding_size = request.param
-    prj = create_project_response(requests, size, overlap, 'permutations', encoding_size)
-    yield prj
-    delete_project(requests, prj)
-
-
-@pytest.fixture(scope='function', params=PROJECT_PARAMS_NP)
-def groups_project(request, requests):
-    size, overlap, encoding_size = request.param
-    prj = create_project_response(requests, size, overlap, 'groups', encoding_size)
-    yield prj
-    delete_project(requests, prj)
-
-
-@pytest.fixture(
-    params=itertools.chain(
-        itertools.product(PROJECT_RESULT_TYPES_2P, [1, 3, 4, 5]),
-        [(t, 1) for t in PROJECT_RESULT_TYPES_NP]))
-def invalid_result_type_number_parties(request):
-    yield request.param

diff --git a/backend/entityservice/tests/generate_test_data.py b/backend/entityservice/tests/generate_test_data.py
diff --git a/backend/entityservice/tests/test_serialization.py b/backend/entityservice/tests/test_serialization.py
@@ -35,21 +35,24 @@ def test_generate_scores_produces_json(self):
             (array('I', [1, 2, 5]), array('I', [2, 2, 5]))
         )
 
+        json_obj = self._serialize_and_load_scores(sims_iter)
+        assert len(json_obj["similarity_scores"]) == 3
+        for pair_and_score in json_obj["similarity_scores"]:
+            self.assertEqual(len(pair_and_score), 3)
+            a, b, score = pair_and_score
+            self.assertEqual(len(a), 2)
+            self.assertEqual(len(b), 2)
+
+    def _serialize_and_load_scores(self, sims_iter):
         buffer = io.BytesIO()
         anonlink.serialization.dump_candidate_pairs(sims_iter, buffer)
         buffer.seek(0)
         json_iterator = generate_scores(buffer)
-
         # Consume the whole iterator and ensure it is valid json
         json_str = ''.join(json_iterator)
         json_obj = json.loads(json_str)
         self.assertIn('similarity_scores', json_obj)
-        assert len(json_obj["similarity_scores"]) == 3
-        for pair_and_score in json_obj["similarity_scores"]:
-            self.assertEqual(len(pair_and_score), 3)
-            a, b, score = pair_and_score
-            self.assertEqual(len(a), 2)
-            self.assertEqual(len(b), 2)
+        return json_obj
 
     def test_sims_to_json_empty(self):
         sims_iter = (
@@ -58,15 +61,7 @@ def test_sims_to_json_empty(self):
             (array('I', []), array('I', []))
         )
 
-        buffer = io.BytesIO()
-        anonlink.serialization.dump_candidate_pairs(sims_iter, buffer)
-        buffer.seek(0)
-        json_iterator = generate_scores(buffer)
-
-        # Consume the whole iterator and ensure it is valid json
-        json_str = ''.join(json_iterator)
-        json_obj = json.loads(json_str)
-        self.assertIn('similarity_scores', json_obj)
+        json_obj = self._serialize_and_load_scores(sims_iter)
         assert len(json_obj["similarity_scores"]) == 0
 
     def test_binary_pack_filters(self):