Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .azurePipeline/k8s_test_job.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ spec:
claimName: $PVC
containers:
- name: entitytester
image: $IMAGE_NAME_WITH_TAG
image: $TEST_E2E_IMAGE_NAME_WITH_TAG
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does E2E stand for? end to end? from which end to which end?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup end to end, here it means tests that only use the public REST api. We could expand further, or add another level to test the integration between client apps and the server.

imagePullPolicy: Always
env:
- name: SERVER
value: http://$SERVICE
command: ["dockerize", "-wait", "http://$SERVICE/api/v1/status", "-timeout", "5m", "python", "-m", "pytest", "-n", "4", "entityservice/tests", "-x", "--junit-xml=/mnt/results.xml"]
command: ["dockerize", "-wait", "http://$SERVICE/api/v1/status", "-timeout", "5m", "python", "-m", "pytest", "-n", "4", "e2etests/tests", "-x", "--junit-xml=/mnt/results.xml"]
volumeMounts:
- mountPath: /mnt
name: results
Expand Down
24 changes: 23 additions & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

variables:
backendImageName: data61/anonlink-app
testE2EImageName: data61/anonlink-test
frontendImageName: data61/anonlink-nginx
tutorialImageName: data61/anonlink-docs-tutorials
benchmarkingImageName: data61/anonlink-benchmark
Expand Down Expand Up @@ -89,6 +90,26 @@ stages:
imageName: data61/anonlink-app
dockerBuildVersion: "$[dependencies.HashBaseDependencies.outputs['SetDockerBaseTag.DOCKER_BASE_TAG']]"

- stage: stage_docker_e2e_test_image_build
displayName: Build E2E Test Docker image
dependsOn: [stage_base_docker_image_build]
jobs:
# Why do we recompute the base hash? Because we can't pass variables between stages.
# https://github.com/microsoft/azure-pipelines-tasks/issues/4743
- job: HashBaseDependencies
displayName: Hash Dependencies
pool:
vmImage: 'ubuntu-latest'
steps:
- template: .azurePipeline/templateSetVariableDockerBaseTag.yml
- template: .azurePipeline/templateDockerBuildPush.yml
parameters:
folder: './e2etests'
jobName: 'anonlink_e2e_test'
dependsOn: HashBaseDependencies
imageName: data61/anonlink-test
dockerBuildVersion: "$[dependencies.HashBaseDependencies.outputs['SetDockerBaseTag.DOCKER_BASE_TAG']]"

- stage: stage_docker_nginx_image_build
displayName: Nginx Docker build
dependsOn: []
Expand Down Expand Up @@ -219,6 +240,7 @@ stages:
echo "##vso[task.setvariable variable=PVC]$(DEPLOYMENT)-test-results"
echo "##vso[task.setvariable variable=SERVICE]$(DEPLOYMENT)-entity-service-server"
echo $(backendImageName):$(DOCKER_TAG) | xargs -I@ echo "##vso[task.setvariable variable=IMAGE_NAME_WITH_TAG]@"
echo $(testE2EImageName):$(DOCKER_TAG) | xargs -I@ echo "##vso[task.setvariable variable=TEST_E2E_IMAGE_NAME_WITH_TAG]@"
echo $(DEPLOYMENT)-tmppod | xargs -I@ echo "##vso[task.setvariable variable=POD_NAME]@"
displayName: 'Set variables for service, test result volume and pod'
- task: Kubernetes@1
Expand Down Expand Up @@ -304,7 +326,7 @@ stages:
cat .azurePipeline/k8s_test_job.yaml.tmpl | \
sed 's|\$PVC'"|$(PVC)|g" | \
sed 's|\$DEPLOYMENT_NAME'"|$(DEPLOYMENT)|g" | \
sed 's|\$IMAGE_NAME_WITH_TAG'"|$(IMAGE_NAME_WITH_TAG)|g" | \
sed 's|\$TEST_E2E_IMAGE_NAME_WITH_TAG'"|$(TEST_E2E_IMAGE_NAME_WITH_TAG)|g" | \
sed 's|\$SERVICE'"|$(SERVICE)|g" > $(Build.ArtifactStagingDirectory)/k8s_test_job.yaml
displayName: 'Prepare integration test job from template'

Expand Down
25 changes: 0 additions & 25 deletions backend/entityservice/tests/config.py

This file was deleted.

183 changes: 0 additions & 183 deletions backend/entityservice/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,183 +0,0 @@
import os
import time
import pytest
import requests as requests_library
import itertools

from entityservice.tests.util import create_project_upload_fake_data, delete_project, create_project_no_data

THROTTLE_SLEEP = 0.2


@pytest.fixture(scope='session')
def requests():
"""
We inject the requests session.
For now we just add a small sleep after every request to ensure we don't get throttled when
tests run back to back. Note the rate limit in nginx is 10 requests per ip per second.
"""
def delay_next(r, *args, **kwargs):
time.sleep(THROTTLE_SLEEP)

testing_session = requests_library.Session()
testing_session.hooks['response'].append(delay_next)
yield testing_session


# Parameterising on:
#
# - pairs of dataset sizes
# - overlap of the sizes
# - result_type for 2 parties in ['similarity_scores', 'permutations'] and for more parties in ['groups']
# - threshold

ENVVAR_NAME = 'ENTITY_SERVICE_RUN_SLOW_TESTS'
THRESHOLDS = [0.9, 1.0]
OVERLAPS = [0.0, 0.9]
ENCODING_SIZES = [8]
NUMBERS_PARTIES = [2, 3, 5]

if os.getenv(ENVVAR_NAME):
ENCODING_SIZES.extend([64, 128, 512, 2048])
OVERLAPS.extend([0.2, 0.5, 1.0])
THRESHOLDS.extend([0.6, 0.8, 0.95])

FAST_SIZES_2P = tuple(itertools.product([1, 1000], repeat=2))
FAST_SIZES_NP = tuple(itertools.chain(
FAST_SIZES_2P,
[(1, 1000, 1000),
(1000, 1, 1000),
(1000, 1000, 1),
(1000, 1000, 1000),
(1000, 1000, 1000, 1000, 1000)]))

SLOW_SIZES_2P = tuple(itertools.combinations([1, 10000, 100000, 1000000], 2))
SLOW_SIZES_NP = tuple(itertools.chain(
SLOW_SIZES_2P,
itertools.product(
[10000, 100000], [10000, 100000], [100000, 1000000]),
((10000, 10000, 100000, 100000, 1000000),)))

SIZES_2P = (tuple(itertools.chain(FAST_SIZES_2P, SLOW_SIZES_2P))
if os.getenv(ENVVAR_NAME)
else FAST_SIZES_2P)
SIZES_NP = (tuple(itertools.chain(FAST_SIZES_NP, SLOW_SIZES_NP))
if os.getenv(ENVVAR_NAME)
else FAST_SIZES_NP)

PROJECT_PARAMS_2P = tuple(
itertools.product(SIZES_2P, OVERLAPS, ENCODING_SIZES))
PROJECT_PARAMS_NP = tuple(
itertools.product(SIZES_NP, OVERLAPS, ENCODING_SIZES))
PROJECT_RESULT_TYPES_2P = ['similarity_scores', 'permutations']
PROJECT_RESULT_TYPES_NP = ['groups']


def create_project_response(requests, size, overlap, result_type, encoding_size=128):
"""
Create a project with the given size, overlap and result_type.

Tests that use one of these projects will get a dict like the following:

{
"project_id": "ID",
"upload-mode": "BINARY" | "JSON",
"size": [size 1, size 2],
"encoding-size": int number of bytes in each encoding e.g. 128,
"overlap": float between 0 and 1,
"result_token": "TOKEN",
"upload_tokens": [TOKENS, ...],
"dp_1": <JSON RESPONSE TO DATA UPLOAD>
"dp_2": <JSON RESPONSE TO DATA UPLOAD>
}
"""
project, dp_responses = create_project_upload_fake_data(
requests, size, overlap=overlap, result_type=result_type, encoding_size=encoding_size)
project.update({
'size': size,
'encoding-size': encoding_size,
'upload-mode': 'JSON',
'overlap': overlap,
'dp_responses': dp_responses
})
return project


@pytest.fixture(scope='function', params=PROJECT_PARAMS_2P)
def similarity_scores_project(request, requests):
size, overlap, encoding_size = request.param
prj = create_project_response(requests, size, overlap, 'similarity_scores', encoding_size)
yield prj
delete_project(requests, prj)


@pytest.fixture(scope='function', params=tuple(itertools.chain(
[(t, 2) for t in PROJECT_RESULT_TYPES_2P],
[(t, n) for t in PROJECT_RESULT_TYPES_NP for n in NUMBERS_PARTIES])))
def result_type_number_parties(request):
yield request.param


@pytest.fixture(params=(
*[(t, n) for t in PROJECT_RESULT_TYPES_2P
for n in (None, 2)],
*[(t, n) for t in PROJECT_RESULT_TYPES_NP
for n in (None, *NUMBERS_PARTIES)]))
def result_type_number_parties_or_none(request):
yield request.param


@pytest.fixture
def valid_project_params(request, result_type_number_parties_or_none):
result_type, number_parties_or_none = result_type_number_parties_or_none
# None is what we use to test handling of default values
params_dict = {'result_type': result_type}
if number_parties_or_none is not None:
params_dict['number_parties'] = number_parties_or_none
return params_dict


@pytest.fixture(scope='function')
def project(request, requests, result_type_number_parties):
result_type, number_parties = result_type_number_parties
project = create_project_no_data(
requests,
result_type=result_type,
number_parties=number_parties)
yield project
# Release project resource
delete_project(requests, project)


@pytest.fixture(scope='function', params=ENCODING_SIZES)
def encoding_size(request):
yield request.param


@pytest.fixture(scope='function', params=THRESHOLDS)
def threshold(request):
yield request.param


@pytest.fixture(scope='function', params=PROJECT_PARAMS_2P)
def permutations_project(request, requests):
size, overlap, encoding_size = request.param
prj = create_project_response(requests, size, overlap, 'permutations', encoding_size)
yield prj
delete_project(requests, prj)


@pytest.fixture(scope='function', params=PROJECT_PARAMS_NP)
def groups_project(request, requests):
size, overlap, encoding_size = request.param
prj = create_project_response(requests, size, overlap, 'groups', encoding_size)
yield prj
delete_project(requests, prj)


@pytest.fixture(
params=itertools.chain(
itertools.product(PROJECT_RESULT_TYPES_2P, [1, 3, 4, 5]),
[(t, 1) for t in PROJECT_RESULT_TYPES_NP]))
def invalid_result_type_number_parties(request):
yield request.param
68 changes: 0 additions & 68 deletions backend/entityservice/tests/generate_test_data.py

This file was deleted.

27 changes: 11 additions & 16 deletions backend/entityservice/tests/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,24 @@ def test_generate_scores_produces_json(self):
(array('I', [1, 2, 5]), array('I', [2, 2, 5]))
)

json_obj = self._serialize_and_load_scores(sims_iter)
assert len(json_obj["similarity_scores"]) == 3
for pair_and_score in json_obj["similarity_scores"]:
self.assertEqual(len(pair_and_score), 3)
a, b, score = pair_and_score
self.assertEqual(len(a), 2)
self.assertEqual(len(b), 2)

def _serialize_and_load_scores(self, sims_iter):
buffer = io.BytesIO()
anonlink.serialization.dump_candidate_pairs(sims_iter, buffer)
buffer.seek(0)
json_iterator = generate_scores(buffer)

# Consume the whole iterator and ensure it is valid json
json_str = ''.join(json_iterator)
json_obj = json.loads(json_str)
self.assertIn('similarity_scores', json_obj)
assert len(json_obj["similarity_scores"]) == 3
for pair_and_score in json_obj["similarity_scores"]:
self.assertEqual(len(pair_and_score), 3)
a, b, score = pair_and_score
self.assertEqual(len(a), 2)
self.assertEqual(len(b), 2)
return json_obj

def test_sims_to_json_empty(self):
sims_iter = (
Expand All @@ -58,15 +61,7 @@ def test_sims_to_json_empty(self):
(array('I', []), array('I', []))
)

buffer = io.BytesIO()
anonlink.serialization.dump_candidate_pairs(sims_iter, buffer)
buffer.seek(0)
json_iterator = generate_scores(buffer)

# Consume the whole iterator and ensure it is valid json
json_str = ''.join(json_iterator)
json_obj = json.loads(json_str)
self.assertIn('similarity_scores', json_obj)
json_obj = self._serialize_and_load_scores(sims_iter)
assert len(json_obj["similarity_scores"]) == 0

def test_binary_pack_filters(self):
Expand Down
Loading