data61 · hardbyte · Mar 3, 2020 · Feb 27, 2020 · Feb 28, 2020 · Feb 28, 2020
diff --git a/.azurePipeline/runDockerComposeTests.sh b/.azurePipeline/runDockerComposeTests.sh
@@ -3,7 +3,7 @@
 SCRIPT_NAME=$(basename "$0")
 TAG="latest"
 RESULT_FILE="testResults.xml"
-TYPE="test"
+TYPE="integrationtests"
 NO_ANSI=FALSE
 
 help() {
@@ -17,7 +17,7 @@ The result is an xml file for the type 'tests' and 'tutorials', and a JSON file
   -p                       Project name (used by docker-compose with '-p'). REQUIRED.
   -o                       Output file where to store the results. [$RESULT_FILE]
   -t                       Docker tag used for the different images. the same tag is used for all of them. [$TAG]
-  --type                   Type of tests to run. Either 'tests', 'benchmark' or 'tutorials'. [$TYPE]
+  --type                   Type of tests to run. Either 'integrationtests', 'benchmark' or 'tutorials'. [$TYPE]
   --no-ansi                Do not print ANSI control characters. Preferable when running on the CI.
 
   -h | --help              Print this message
@@ -53,7 +53,7 @@ echoerr () {
 
 process_args "$@"
 [[ -z $PROJECT_NAME ]] && echoerr "ERROR: Missing project name. Use option '-p'" && exit 1
-[[ $TYPE != "tests" && $TYPE != "benchmark" && $TYPE != "tutorials" ]] && echoerr "ERROR: Unrecognized type '$TYPE'. Should be equal to 'tests', 'benchmark' or 'tutorials'" && exit 1
+[[ $TYPE != "integrationtests" && $TYPE != "benchmark" && $TYPE != "tutorials" ]] && echoerr "ERROR: Unrecognized type '$TYPE'. Should be equal to 'integrationtests', 'benchmark' or 'tutorials'" && exit 1
 export TAG=$TAG
 
 commandPrefix="docker-compose -f tools/docker-compose.yml -f tools/ci.yml --project-directory . "
@@ -64,7 +64,7 @@ fi
 echo "Initialise the database"
 $commandPrefix -p $PROJECT_NAME up db_init > /dev/null 2>&1
 
-if [[ $TYPE == "tests" ]]; then
+if [[ $TYPE == "integrationtests" ]]; then
   CREATED_RESULT_FILE="/var/www/testResults.xml"
 elif [[ $TYPE == "benchmark" ]]; then
   CREATED_RESULT_FILE="/app/results.json"

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -120,6 +120,31 @@ stages:
       imageName: data61/anonlink-docs-tutorials
       jobName: 'anonlink_docs_tutorials'
 
+- stage: stage_integration_tests
+  displayName: Integration Tests
+  dependsOn:
+  - stage_docker_image_build
+  jobs:
+  - job: Integration
+    timeoutInMinutes: 15
+    variables:
+      resultFile: testResults.xml
+    displayName: Integration Tests
+    pool:
+      vmImage: 'ubuntu-latest'
+    steps:
+    - template: .azurePipeline/templateSetVariableDockerTag.yml
+    - script: |
+        ./.azurePipeline/runDockerComposeTests.sh --no-ansi -p es$(DOCKER_TAG)$(Build.SourceVersion) -t $(DOCKER_TAG) -o $(resultFile) --type integrationtests
+      displayName: 'Start docker compose benchmark'
+    - task: PublishTestResults@2
+      condition: succeededOrFailed()
+      inputs:
+        testResultsFormat: 'JUnit'
+        testResultsFiles: '$(resultFile)'
+        testRunTitle: 'Publish integration test results'
+        failTaskOnFailedTests: true
+
 - stage: stage_benchmark
   displayName: Benchmark
   dependsOn:

diff --git a/backend/entityservice/cache/progress.py b/backend/entityservice/cache/progress.py
@@ -1,13 +1,15 @@
 import structlog
 
-from entityservice.settings import Config as config
+from entityservice.settings import Config as globalconfig
 from entityservice.cache.connection import connect_to_redis
 from entityservice.cache.helpers import _get_run_hash_key
 
 logger = structlog.get_logger()
 
 
-def save_current_progress(comparisons, run_id):
+def save_current_progress(comparisons, run_id, config=None):
+    if config is None:
+        config = globalconfig
     logger.debug(f"Updating progress. Compared {comparisons} CLKS", run_id=run_id)
     r = connect_to_redis()
     key = _get_run_hash_key(run_id)

diff --git a/backend/entityservice/integrationtests/__init__.py b/backend/entityservice/integrationtests/__init__.py
diff --git a/backend/entityservice/integrationtests/dbtests/__init__.py b/backend/entityservice/integrationtests/dbtests/__init__.py
diff --git a/backend/entityservice/integrationtests/dbtests/test_insertions.py b/backend/entityservice/integrationtests/dbtests/test_insertions.py
@@ -0,0 +1,99 @@
+import datetime
+import time
+
+import psycopg2
+from pytest import raises
+
+from entityservice.database import insert_dataprovider, insert_new_project, \
+    insert_encodings_into_blocks, insert_blocking_metadata, get_project, get_encodingblock_ids
+from entityservice.models import Project
+from entityservice.tests.util import generate_bytes
+from entityservice.utils import generate_code
+from entityservice.settings import Config as config
+
+
+class TestInsertions:
+
+    def _get_conn_and_cursor(self):
+        db = config.DATABASE
+        host = config.DATABASE_SERVER
+        user = config.DATABASE_USER
+        password = config.DATABASE_PASSWORD
+        conn = psycopg2.connect(host=host, dbname=db, user=user, password=password)
+        cursor = conn.cursor()
+        return conn, cursor
+
+    def _create_project_and_dp(self):
+        project, dp_ids = self._create_project()
+        dp_id = dp_ids[0]
+        dp_auth_token = project.update_tokens[0]
+
+        conn, cur = self._get_conn_and_cursor()
+        # create a default block
+        insert_blocking_metadata(conn, dp_id, {'1': 99})
+        conn.commit()
+
+        assert len(dp_auth_token) == 48
+        return project.project_id, project.result_token, dp_id, dp_auth_token
+
+    def _create_project(self):
+        project = Project('groups', {}, name='', notes='', parties=2, uses_blocking=False)
+        conn, cur = self._get_conn_and_cursor()
+        dp_ids = project.save(conn)
+        return project, dp_ids
+
+    def test_insert_project(self):
+        before = datetime.datetime.now()
+        project, _ = self._create_project()
+        assert len(project.result_token) == 48
+        # check we can fetch the inserted project back from the database
+        conn, cur = self._get_conn_and_cursor()
+        project_response = get_project(conn, project.project_id)
+        assert 'time_added' in project_response
+        assert project_response['time_added'] - before >= datetime.timedelta(seconds=0)
+        assert not project_response['marked_for_deletion']
+        assert not project_response['uses_blocking']
+        assert project_response['parties'] == 2
+        assert project_response['notes'] == ''
+        assert project_response['name'] == ''
+        assert project_response['result_type'] == 'groups'
+        assert project_response['schema'] == {}
+        assert project_response['encoding_size'] is None
+
+    def test_insert_dp_no_project_fails(self):
+        conn, cur = self._get_conn_and_cursor()
+        project_id = generate_code()
+        dp_auth = generate_code()
+        with raises(psycopg2.errors.ForeignKeyViolation):
+            insert_dataprovider(cur, auth_token=dp_auth, project_id=project_id)
+
+    def test_insert_many_clks(self):
+        data = [generate_bytes(128) for _ in range(100)]
+        project_id, project_auth_token, dp_id, dp_auth_token = self._create_project_and_dp()
+        conn, cur = self._get_conn_and_cursor()
+        num_entities = 10_000
+        blocks = [['1'] for _ in range(num_entities)]
+        encodings = [data[i % 100] for i in range(num_entities)]
+        start_time = time.perf_counter()
+        insert_encodings_into_blocks(conn, dp_id,
+                                     block_ids=blocks,
+                                     encoding_ids=list(range(num_entities)),
+                                     encodings=encodings
+                                     )
+        end_time = time.perf_counter()
+        elapsed_time = end_time - start_time
+        # This takes ~0.5s using docker compose on a ~5yo desktop.
+        # If the database is busy - e.g. if you're running integration
+        # tests and e2e tests at the same time, this assertion could fail.
+        assert elapsed_time < 2
+
+        stored_encoding_ids = list(get_encodingblock_ids(conn, dp_id, '1'))
+        fetch_time = time.perf_counter() - end_time
+        # retrieval of encoding ids should be much faster than insertion
+        assert fetch_time < elapsed_time
+
+        assert len(stored_encoding_ids) == num_entities
+        for stored_encoding_id, original in zip(stored_encoding_ids, range(num_entities)):
+            assert stored_encoding_id == original
+
+        # TODO fetch binary encodings and verify against uploaded
diff --git a/backend/entityservice/integrationtests/redistests/__init__.py b/backend/entityservice/integrationtests/redistests/__init__.py
diff --git a/backend/entityservice/integrationtests/redistests/test_progress.py b/backend/entityservice/integrationtests/redistests/test_progress.py
@@ -0,0 +1,53 @@
+import datetime
+import time
+
+import pytest
+import redis
+
+from entityservice.settings import Config as config
+from entityservice.cache import connect_to_redis, clear_progress, save_current_progress, get_progress
+
+
+class TestProgress:
+
+    def _get_redis_rw(self):
+        return connect_to_redis()
+
+    def test_clear_missing_progress(self):
+        clear_progress('test_clear_missing_progress')
+
+    def test_clear_progress(self):
+        config.CACHE_EXPIRY = datetime.timedelta(seconds=1)
+        runid = 'runtest_clear_progress'
+        save_current_progress(1, runid, config)
+        assert 1 == get_progress(runid)
+        clear_progress(runid)
+        assert get_progress(runid) is None
+
+    def test_storing_wrong_type(self):
+        config.CACHE_EXPIRY = datetime.timedelta(seconds=1)
+        runid = 'test_storing_wrong_type'
+        with pytest.raises(redis.exceptions.ResponseError):
+            save_current_progress(1.5, runid, config)
+
+    def test_progress_expires(self):
+        # Uses the minimum expiry of 1 second
+        config.CACHE_EXPIRY = datetime.timedelta(seconds=1)
+        runid = 'test_progress_expires'
+        save_current_progress(42, runid, config)
+        cached_progress = get_progress(runid)
+        assert cached_progress == 42
+        time.sleep(1)
+        # After expiry the progress should be reset to None
+        assert get_progress(runid) is None
+
+    def test_progress_increments(self):
+        config.CACHE_EXPIRY = datetime.timedelta(seconds=1)
+        runid = 'test_progress_increments'
+        save_current_progress(1, runid, config)
+        cached_progress = get_progress(runid)
+        assert cached_progress == 1
+        for i in range(99):
+            save_current_progress(1, runid, config)
+
+        assert 100 == get_progress(runid)
diff --git a/backend/entityservice/integrationtests/redistests/test_status.py b/backend/entityservice/integrationtests/redistests/test_status.py
@@ -0,0 +1,33 @@
+import time
+
+import redis
+from pytest import raises
+
+from entityservice.cache import connect_to_redis, get_status, set_status
+
+
+class TestStatus:
+
+    def _get_redis_rw(self):
+        return connect_to_redis()
+
+    def test_get_missing_status(self):
+        r = self._get_redis_rw()
+        r.delete('entityservice-status')
+        status = get_status()
+        assert status is None
+
+
+    def test_set_status(self):
+        original_status = get_status()
+        if original_status is None:
+            new_status = {}
+        else:
+            new_status = original_status
+
+        new_status['testkey'] = 'testvalue'
+        set_status(new_status)
+        time.sleep(0.2)
+        updated_status = get_status()
+        assert 'testkey' in updated_status
+
diff --git a/backend/entityservice/models/project.py b/backend/entityservice/models/project.py
@@ -86,11 +86,14 @@ def save(self, conn):
             logger.debug("New project created in DB")
             logger.debug("Creating new data provider entries")
 
+            dp_ids = []
             for auth_token in self.update_tokens:
                 dp_id = db.insert_dataprovider(cur, auth_token, project_id)
+                dp_ids.append(dp_id)
                 logger.debug("Added a dataprovider to db", dp_id=dp_id)
 
             logger.debug("Added data providers")
 
             logger.debug("Committing transaction")
             conn.commit()
+        return dp_ids
diff --git a/docs/development.rst b/docs/development.rst
@@ -105,28 +105,25 @@ The run info ``HASH`` stores:
 Testing Local Deployment
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-The docker compose file ``tools/ci.yml`` is deployed along with ``tools/docker-compose.yml``. This simply defines an
-additional container (from the same backend image) which runs the integration tests after a short delay.
-
-The logs from the various containers (nginx, backend, worker, database) are all collected, archived and are made
-available in the Jenkins UI for introspection.
+The docker compose file ``tools/ci.yml`` is deployed along with ``tools/docker-compose.yml``. This compose file
+defines additional containers which run benchmarks and tests after a short delay.
 
 
 Testing K8s Deployment
 ~~~~~~~~~~~~~~~~~~~~~~
 
 The kubernetes deployment uses ``helm`` with the template found in ``deployment/entity-service``. Jenkins additionally
 defines the docker image versions to use and ensures an ingress is not provisioned. The deployment is configured to be
-quite conservative in terms of cluster resources. Currently this logic all resides in ``Jenkinsfile.groovy``.
+quite conservative in terms of cluster resources.
 
 The k8s deployment test is limited to 30 minutes and an effort is made to clean up all created resources.
 
-After a few minutes waiting for the deployment a `Kubernetes Job <https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/>`__ is created using ``kubectl create``.
+After a few minutes waiting for the deployment a
+`Kubernetes Job <https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/>`__ is created using
+``kubectl create``.
 
 This job includes a ``1GiB`` `persistent volume claim <https://kubernetes.io/docs/concepts/storage/persistent-volumes/>`__
-to which the results are written (as ``results.xml``). During the testing the pytest output will be rendered in jenkins,
+to which the results are written (as ``results.xml``). During the testing the pytest output will be rendered,
 and then the Job's pod terminates. We create a temporary pod which mounts the same results volume and then we copy
-across the produced artifact for rendering in Jenkins. This dance is only necessary to retrieve files from the cluster
-to our Jenkins instance, it would be straightforward if we only wanted the stdout from each pod/job.
-
+across the produced test result artifact.
 
diff --git a/docs/devops.rst b/docs/devops.rst
@@ -34,8 +34,9 @@ The continuous integration stages are:
   - the benchmark ``data61/anonlink-benchmark`` (used to run the benchmark)
 - runs the benchmark using ``docker-compose`` and publishes the results as an artifact in Azure
 - runs the tutorial tests using ``docker-compose`` and publishes the results in Azure
-- runs the integration tests by deploying the whole service on ``Kubernetes``, running the integration
-  tests and publishing the results in Azure. The pod logs are also available in Azure DevOps.
+- runs the end to end tests by deploying the whole service on ``Kubernetes``, running the
+  tests found in ``backend/entityservice/tests`` and publishing the results in Azure. The pod logs
+  are also available in Azure DevOps.
 
 The build pipeline is triggered for every push on every branch. It is not triggered by Pull
 Requests to avoid duplicate testing and building potentially untrusted external code.

diff --git a/tools/ci.yml b/tools/ci.yml
@@ -13,7 +13,20 @@ services:
       - redis
       - worker
       - nginx
-
+
+  integrationtests:
+    image: data61/anonlink-app:${TAG:-latest}
+    environment:
+      - SERVER=http://nginx:8851
+      - INITIAL_DELAY=20
+      - DATABASE_PASSWORD=rX%QpV7Xgyrz
+    command: /bin/sh -c "dockerize -wait tcp://nginx:8851/v1/status -timeout 1m python -m pytest -n 1 entityservice/integrationtests --junitxml=testResults.xml -x"
+    depends_on:
+      - db
+      - backend
+      - worker
+      - nginx
+
   benchmark:
     image: data61/anonlink-benchmark:${TAG:-latest}
     environment: