diff --git a/.azurePipeline/runDockerComposeTests.sh b/.azurePipeline/runDockerComposeTests.sh index dbb50396..d3a4d3d3 100755 --- a/.azurePipeline/runDockerComposeTests.sh +++ b/.azurePipeline/runDockerComposeTests.sh @@ -12,7 +12,7 @@ Usage: $SCRIPT_NAME [parameters] Script run by the Azure Pipeline to start all the services required by the entity service with docker-compose and the test or benchmark container, copying the results in a chosen file. -The result is an xml file for the type 'tests' and 'tutorials', and a JSON file for the type 'benchmark'. +The result is an xml file for the type 'integrationtests' and 'tutorials', and a JSON file for the type 'benchmark'. -p Project name (used by docker-compose with '-p'). REQUIRED. -o Output file where to store the results. [$RESULT_FILE] @@ -60,9 +60,11 @@ commandPrefix="docker-compose -f tools/docker-compose.yml -f tools/ci.yml --proj if [[ "$NO_ANSI" == "TRUE" ]]; then commandPrefix="$commandPrefix --no-ansi " fi - -echo "Initialise the database" -$commandPrefix -p $PROJECT_NAME up db_init > /dev/null 2>&1 + +echo "Initialise the database and the object store" +$commandPrefix -p $PROJECT_NAME up objectstore_init db_init +echo "Initialisation complete" + if [[ $TYPE == "integrationtests" ]]; then CREATED_RESULT_FILE="/var/www/testResults.xml" @@ -75,7 +77,6 @@ else exit 1 fi -echo "Start type $TYPE" $commandPrefix -p $PROJECT_NAME up --abort-on-container-exit --exit-code-from $TYPE db minio redis backend worker nginx $TYPE exit_code=$? echo "Retrieve the $TYPE tests results." diff --git a/backend/entityservice/api_def/openapi.yaml b/backend/entityservice/api_def/openapi.yaml index c23cf816..13d5073a 100644 --- a/backend/entityservice/api_def/openapi.yaml +++ b/backend/entityservice/api_def/openapi.yaml @@ -8,13 +8,15 @@ # descriptions. openapi: 3.0.0 info: - version: '1.3' + version: '1.13' title: Entity Matching API x-logo: url: http://clkhash.readthedocs.io/en/latest/_static/logo.svg contact: name: 'Confidential Computing, Data61 | CSIRO' email: confidential-computing@csiro.au + url: https://github.com/data61/anonlink-entity-service + description: >- Allows multiple organisations to carry out private record linkage - without disclosing personally identifiable information. @@ -86,6 +88,8 @@ info: servers: - url: https://anonlink.easd.data61.xyz/api/v1 description: default EASD cluster +- url: http://localhost:8851/api/v1 + description: Local deployment paths: /status: @@ -276,6 +280,44 @@ paths: '503': $ref: '#/components/responses/RateLimited' + '/projects/{project_id}/authorize-external-upload': + get: + operationId: entityservice.views.objectstore.authorize_external_upload + summary: Retrieve temporary objest store credentials for uploading data + tags: + - Project + description: | + Returns a set of temporary security credentials that the client can use to upload data to the + object store. + + A valid **upload token** is required to authorise this call. The returned *Temporary Object + Store Credentials* can be used with any S3 compatible client. For example by using `boto3` in + Python. The returned credentials are restricted to allow only uploading data to a particular path + in a particular bucket for a finite period (defaulting to 12 hours). + + Note this feature may be disabled by the administrator, in this case the endpoint will return a + `500` server error. + parameters: + - $ref: '#/components/parameters/project_id' + - $ref: '#/components/parameters/token' + responses: + '201': + description: Temporary Object Store Credentials + content: + application/json: + schema: + $ref: '#/components/schemas/ObjectStoreCredentials' + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + '/projects/{project_id}/clks': post: operationId: entityservice.views.project.project_clks_post @@ -327,12 +369,6 @@ paths: oneOf: - $ref: '#/components/schemas/CLKUpload' - $ref: '#/components/schemas/CLKnBlockUpload' - # unfortunately connexion can not handle multiple different encoding types on an endpoint. - #application/octet-stream: - # schema: - # type: string - # format: binary - responses: '201': description: Data Uploaded @@ -833,7 +869,7 @@ components: minimum: 0 description: type: string - description: oportunity to give those numbers some context, what are we counting here? + description: opportunity to give those numbers some context, what are we counting here? relative: type: number format: double @@ -1076,3 +1112,37 @@ components: type: string message: type: string + + ObjectStoreCredentials: + description: Temporary credentials allowing client to upload a file to an object store. + type: object + properties: + upload: + description: | + Configuration of object store to upload file/s to. Specifies the server, bucket and + the approved path. The attached credentials are restricted to only allow uploads to + this path. + type: object + properties: + endpoint: + type: string + description: Hostname, and port of object store. E.g. minio.anonlink.example.com:9000 + bucket: + type: string + description: Target bucket + path: + type: string + description: Target path + + credentials: + description: Object Store credentials (compatible with both AWS & MinIO) + type: object + properties: + AccessKeyId: + type: string + SecretAccessKey: + type: string + Expiration: + type: string + SessionToken: + type: string diff --git a/backend/entityservice/database/selections.py b/backend/entityservice/database/selections.py index 0d8d9120..6c6c84d9 100644 --- a/backend/entityservice/database/selections.py +++ b/backend/entityservice/database/selections.py @@ -489,8 +489,8 @@ def get_all_objects_for_project(db, project_id): WHERE dp = %s """, [dp['id']], one=True) - if clk_file_ref is not None: - logger.info("blooming data file found: {}".format(clk_file_ref)) + if clk_file_ref is not None and clk_file_ref['file'] is not None: + logger.info("upload record found: {}".format(clk_file_ref)) object_store_files.append(clk_file_ref['file']) if result_type == "similarity_scores": diff --git a/backend/entityservice/integrationtests/objectstoretests/conftest.py b/backend/entityservice/integrationtests/objectstoretests/conftest.py new file mode 100644 index 00000000..5aa7df5c --- /dev/null +++ b/backend/entityservice/integrationtests/objectstoretests/conftest.py @@ -0,0 +1,22 @@ +import os +from time import sleep + +import minio +import pytest + +from entityservice.settings import Config as config + +@pytest.fixture(scope='session') +def upload_restricted_minio_client(): + + sleep(int(os.getenv('INITIAL_DELAY', '5'))) + + restricted_mc_client = minio.Minio( + config.UPLOAD_OBJECT_STORE_SERVER, + config.UPLOAD_OBJECT_STORE_ACCESS_KEY, + config.UPLOAD_OBJECT_STORE_SECRET_KEY, + region='us-east-1', + secure=False + ) + restricted_mc_client.set_app_info("anonlink-restricted", "testing client") + return restricted_mc_client diff --git a/backend/entityservice/integrationtests/objectstoretests/test_objectstore.py b/backend/entityservice/integrationtests/objectstoretests/test_objectstore.py new file mode 100644 index 00000000..533f744d --- /dev/null +++ b/backend/entityservice/integrationtests/objectstoretests/test_objectstore.py @@ -0,0 +1,70 @@ +""" +Testing: + - uploading over existing files + - using deleted credentials + - using expired credentials + +""" +import io + +import minio +from minio import Minio +import pytest +from minio.credentials import AssumeRoleProvider, Credentials + +from entityservice.object_store import connect_to_object_store, connect_to_upload_object_store +from entityservice.settings import Config + +restricted_upload_policy = """{ + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "s3:PutObject" + ], + "Effect": "Allow", + "Resource": [ + "arn:aws:s3:::uploads/2020/*" + ], + "Sid": "Upload-access-to-specific-bucket-only" + } + ] +} +""" + + +class TestAssumeRole: + + def test_temp_credentials_minio(self): + + upload_endpoint = Config.UPLOAD_OBJECT_STORE_SERVER + bucket_name = "uploads" + + root_mc_client = connect_to_object_store() + upload_restricted_minio_client = connect_to_upload_object_store() + if not root_mc_client.bucket_exists(bucket_name): + root_mc_client.make_bucket(bucket_name) + + with pytest.raises(minio.error.AccessDenied): + upload_restricted_minio_client.list_buckets() + + # Should be able to put an object though + upload_restricted_minio_client.put_object(bucket_name, 'testobject', io.BytesIO(b'data'), length=4) + + credentials_provider = AssumeRoleProvider(upload_restricted_minio_client, + Policy=restricted_upload_policy + ) + temp_creds = Credentials(provider=credentials_provider) + + newly_restricted_mc_client = Minio(upload_endpoint, credentials=temp_creds, region='us-east-1', secure=False) + + with pytest.raises(minio.error.AccessDenied): + newly_restricted_mc_client.list_buckets() + + # Note this put object worked with the earlier credentials + # But should fail if we have applied the more restrictive policy + with pytest.raises(minio.error.AccessDenied): + newly_restricted_mc_client.put_object(bucket_name, 'testobject2', io.BytesIO(b'data'), length=4) + + # this path is allowed in the policy however + newly_restricted_mc_client.put_object(bucket_name, '2020/testobject', io.BytesIO(b'data'), length=4) diff --git a/backend/entityservice/object_store.py b/backend/entityservice/object_store.py index 8a0b4602..bf754e05 100644 --- a/backend/entityservice/object_store.py +++ b/backend/entityservice/object_store.py @@ -14,10 +14,34 @@ def connect_to_object_store(): secure=False ) logger.debug("Connected to minio") - if not mc.bucket_exists(config.MINIO_BUCKET): - logger.info("Creating bucket {}".format(config.MINIO_BUCKET)) + mc.set_app_info("anonlink-client", "minio general client") + create_bucket(mc, config.MINIO_BUCKET) + return mc + + +def connect_to_upload_object_store(): + """ + Instantiate a minio client with an upload only policy applied. + + :return: + """ + mc = minio.Minio( + config.UPLOAD_OBJECT_STORE_SERVER, + config.UPLOAD_OBJECT_STORE_ACCESS_KEY, + config.UPLOAD_OBJECT_STORE_SECRET_KEY, + region="us-east-1", + secure=False + ) + mc.set_app_info("anonlink-upload", "minio client for uploads") + logger.debug("Connected to minio upload account") + + return mc + + +def create_bucket(minio_client, bucket): + if not minio_client.bucket_exists(bucket): + logger.info("Creating bucket {}".format(bucket)) try: - mc.make_bucket(config.MINIO_BUCKET) + minio_client.make_bucket(bucket) except minio.error.BucketAlreadyOwnedByYou: - logger.info("The bucket {} was already created.".format(config.MINIO_BUCKET)) - return mc + logger.info("The bucket {} was already created.".format(bucket)) diff --git a/backend/entityservice/settings.py b/backend/entityservice/settings.py index 14b73531..6e0f412e 100644 --- a/backend/entityservice/settings.py +++ b/backend/entityservice/settings.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3.4 """ Config shared between the application backend and the celery workers. """ @@ -31,6 +30,13 @@ class Config(object): MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', '') MINIO_BUCKET = os.getenv('MINIO_BUCKET', 'entityservice') + UPLOAD_OBJECT_STORE_ENABLED = os.getenv('UPLOAD_OBJECT_STORE_ENABLED', 'true').lower() == "true" + UPLOAD_OBJECT_STORE_STS_DURATION = int(os.getenv('UPLOAD_OBJECT_STORE_STS_DURATION', '43200')) + UPLOAD_OBJECT_STORE_SERVER = os.getenv('UPLOAD_OBJECT_STORE_SERVER', MINIO_SERVER) + UPLOAD_OBJECT_STORE_ACCESS_KEY = os.getenv('UPLOAD_OBJECT_STORE_ACCESS_KEY', '') + UPLOAD_OBJECT_STORE_SECRET_KEY = os.getenv('UPLOAD_OBJECT_STORE_SECRET_KEY', '') + UPLOAD_OBJECT_STORE_BUCKET = os.getenv('UPLOAD_OBJECT_STORE_BUCKET', 'anonlink-uploads') + DATABASE_SERVER = os.getenv('DATABASE_SERVER', 'db') DATABASE = os.getenv('DATABASE', 'postgres') DATABASE_USER = os.getenv('DATABASE_USER', 'postgres') diff --git a/backend/entityservice/views/objectstore.py b/backend/entityservice/views/objectstore.py new file mode 100644 index 00000000..c9055046 --- /dev/null +++ b/backend/entityservice/views/objectstore.py @@ -0,0 +1,83 @@ +import json + +import opentracing +from flask import request +from minio.credentials import AssumeRoleProvider, Credentials + +from entityservice.settings import Config as config +import entityservice.database as db +from entityservice.object_store import connect_to_upload_object_store +from entityservice.utils import safe_fail_request +from entityservice.views import bind_log_and_span, precheck_upload_token +from entityservice.views.serialization import ObjectStoreCredentials + + +def _get_upload_policy(bucket_name="uploads", path="*"): + + restricted_upload_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "s3:PutObject" + ], + "Effect": "Allow", + "Resource": [ + "arn:aws:s3:::{}/{}".format(bucket_name, path), + "arn:aws:s3:::{}/{}/*".format(bucket_name, path), + ], + "Sid": "Upload-access-to-specific-bucket-only" + } + ] + } + + return json.dumps(restricted_upload_policy) + + +def authorize_external_upload(project_id): + if not config.UPLOAD_OBJECT_STORE_ENABLED: + safe_fail_request(500, + message="Retrieving temporary object store credentials feature disabled", + title="Feature Disabled") + + headers = request.headers + + log, parent_span = bind_log_and_span(project_id) + + log.debug("Authorizing external upload") + token = precheck_upload_token(project_id, headers, parent_span) + log.debug(f"Update token is valid") + with db.DBConn() as conn: + dp_id = db.get_dataprovider_id(conn, token) + log = log.bind(dpid=dp_id) + + with opentracing.tracer.start_span('assume-role-request', child_of=parent_span): + client = connect_to_upload_object_store() + client.set_app_info("anonlink", "development version") + + bucket_name = config.UPLOAD_OBJECT_STORE_BUCKET + path = f"{project_id}/{dp_id}" + log.info(f"Retrieving temporary object store credentials for path: '{bucket_name}/{path}'") + + credentials_provider = AssumeRoleProvider(client, + Policy=_get_upload_policy(bucket_name, path=path), + DurationSeconds=config.UPLOAD_OBJECT_STORE_STS_DURATION) + credential_values = Credentials(provider=credentials_provider).get() + expiry = credentials_provider._expiry._expiration + + log.info("Retrieved temporary credentials") + + credentials_json = ObjectStoreCredentials().dump(credential_values) + log.debug("Temp credentials", **credentials_json) + + # Convert datetime to ISO 8601 string + credentials_json["Expiration"] = expiry.strftime('%Y-%m-%dT%H:%M:%S.%f%z') + + return { + "credentials": credentials_json, + "upload": { + "endpoint": config.UPLOAD_OBJECT_STORE_SERVER, + "bucket": bucket_name, + "path": f"{project_id}/{dp_id}" + } + } diff --git a/backend/entityservice/views/project.py b/backend/entityservice/views/project.py index d7d4b656..0f38eb81 100644 --- a/backend/entityservice/views/project.py +++ b/backend/entityservice/views/project.py @@ -108,7 +108,7 @@ def project_binaryclks_post(project_id): """ log, parent_span = bind_log_and_span(project_id) headers = request.headers - token = precheck_encoding_upload(project_id, headers, parent_span) + token = precheck_upload_token(project_id, headers, parent_span) with DBConn() as conn: dp_id = db.get_dataprovider_id(conn, token) @@ -176,7 +176,7 @@ def encoding_iterator(filter_stream): return {'message': 'Updated', 'receipt_token': receipt_token}, 201 -def precheck_encoding_upload(project_id, headers, parent_span): +def precheck_upload_token(project_id, headers, parent_span): """ Raise a `ProblemException` if the project doesn't exist or the authentication token passed in the headers isn't valid. @@ -202,7 +202,7 @@ def project_clks_post(project_id): log, parent_span = bind_log_and_span(project_id) - token = precheck_encoding_upload(project_id, headers, parent_span) + token = precheck_upload_token(project_id, headers, parent_span) with DBConn() as conn: dp_id = db.get_dataprovider_id(conn, token) diff --git a/backend/entityservice/views/serialization.py b/backend/entityservice/views/serialization.py index 67b63fc8..81d678e6 100644 --- a/backend/entityservice/views/serialization.py +++ b/backend/entityservice/views/serialization.py @@ -80,3 +80,11 @@ class running(RunStatus): class error(RunStatus): message = fields.String(required=True) detail = fields.String() + + +class ObjectStoreCredentials(Schema): + access_key = fields.String(data_key="AccessKeyId") + secret_key = fields.String(data_key="SecretAccessKey") + session_token = fields.String(data_key="SessionToken") + # Note expiry is from a separate object + #expiry = fields.String(data_key="Expiration") diff --git a/base/requirements.txt b/base/requirements.txt index 28d3cf78..b5720ea3 100644 --- a/base/requirements.txt +++ b/base/requirements.txt @@ -13,7 +13,7 @@ ijson==2.6.1 iso8601==0.1.12 jaeger-client==4.0.0 marshmallow==3.0.0b10 -minio==5.0.7 +minio==5.0.10 opentracing==2.3.0 opentracing_instrumentation==2.4.3 psycopg2==2.8.4 @@ -22,7 +22,7 @@ pytest-xdist==1.29.0 PyYAML==5.3 redis==3.4.1 requests==2.23.0 -setproctitle==1.1.10 # used by celery to change process nameFlaskTracing +setproctitle==1.1.10 # used by celery to change process name structlog==20.1.0 tenacity==5.1.1 tornado==4.5.3 diff --git a/deployment/entity-service/requirements.yaml b/deployment/entity-service/requirements.yaml index 29d86bb5..5293b24c 100644 --- a/deployment/entity-service/requirements.yaml +++ b/deployment/entity-service/requirements.yaml @@ -4,10 +4,10 @@ dependencies: repository: https://kubernetes-charts.storage.googleapis.com condition: provision.redis - name: minio - version: 5.0.19 + version: 5.0.22 repository: https://kubernetes-charts.storage.googleapis.com condition: provision.minio - name: postgresql version: 8.3.0 repository: https://kubernetes-charts.storage.googleapis.com - condition: provision.postgresql \ No newline at end of file + condition: provision.postgresql diff --git a/deployment/entity-service/templates/api-deployment.yaml b/deployment/entity-service/templates/api-deployment.yaml index 0a5f7d42..e8a93d41 100644 --- a/deployment/entity-service/templates/api-deployment.yaml +++ b/deployment/entity-service/templates/api-deployment.yaml @@ -82,6 +82,16 @@ spec: secretKeyRef: name: {{ template "es.fullname" . }} key: minioSecretKey + - name: UPLOAD_OBJECT_STORE_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ template "es.fullname" . }} + key: minioUploadAccessKey + - name: UPLOAD_OBJECT_STORE_SECRET_KEY + valueFrom: + secretKeyRef: + name: {{ template "es.fullname" . }} + key: minioUploadSecretKey ports: - containerPort: 8000 name: entity-flask diff --git a/deployment/entity-service/templates/configmap.yaml b/deployment/entity-service/templates/configmap.yaml index 7a0c4306..8b1723f7 100644 --- a/deployment/entity-service/templates/configmap.yaml +++ b/deployment/entity-service/templates/configmap.yaml @@ -31,6 +31,9 @@ data: # MINIO_SECRET_KEY provided as a secret MINIO_BUCKET: {{ required "minio.defaultBucket.name is required." .Values.minio.defaultBucket.name | quote }} + UPLOAD_OBJECT_STORE_ENABLED: {{ .Values.anonlink.objectstore.uploadEnabled | quote }} + UPLOAD_OBJECT_STORE_BUCKET: {{ required "anonlink.objectstore.uploadBucket.name is required." .Values.anonlink.objectstore.uploadBucket.name | quote }} + {{ if .Values.provision.postgresql }} DATABASE_SERVER: {{ .Release.Name }}-{{ required "postgresql.nameOverride is required." .Values.postgresql.nameOverride }} {{ else }} diff --git a/deployment/entity-service/templates/es-credentials.yaml b/deployment/entity-service/templates/es-credentials.yaml index dedd6eaa..8f62e274 100644 --- a/deployment/entity-service/templates/es-credentials.yaml +++ b/deployment/entity-service/templates/es-credentials.yaml @@ -9,3 +9,5 @@ data: redisPassword: {{ required "redis.password must be provided." .Values.redis.password | b64enc | quote }} minioAccessKey: {{ required "minio.accessKey must be provided." .Values.minio.accessKey | b64enc | quote }} minioSecretKey: {{ required "minio.secretKey must be provided." .Values.minio.secretKey | b64enc | quote }} + minioUploadAccessKey: {{ required "anonlink.objectstore.uploadAccessKey must be provided." .Values.anonlink.objectstore.uploadAccessKey | b64enc | quote }} + minioUploadSecretKey: {{ required "anonlink.objectstore.uploadSecretKey must be provided." .Values.anonlink.objectstore.uploadSecretKey | b64enc | quote }} diff --git a/deployment/entity-service/templates/init-objectstore-job.yaml b/deployment/entity-service/templates/init-objectstore-job.yaml new file mode 100644 index 00000000..66b3be08 --- /dev/null +++ b/deployment/entity-service/templates/init-objectstore-job.yaml @@ -0,0 +1,61 @@ +{{- if .Values.api.objectstoreinit.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "es.fullname" . }}-init-objectstore + labels: + {{- include "es.release_labels" . | indent 4 }} + tier: aux + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-delete-policy": hook-succeeded, hook-failed +spec: + template: + metadata: + name: {{ template "name" . }}-init-objectstore + labels: + {{- include "es.release_labels" . | indent 8 }} + spec: + containers: + - name: objectstore-init + image: {{ required "api.objectstoreinit.image.repository must be provided." .Values.api.objectstoreinit.image.repository }}:{{ required "api.objectstoreinit.image.tag must be provided." .Values.api.objectstoreinit.image.tag }} + env: + - name: MINIO_SERVER + valueFrom: + configMapKeyRef: + key: MINIO_SERVER + name: {{ template "es.fullname" . }} + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ template "es.fullname" . }} + key: minioAccessKey + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + name: {{ template "es.fullname" . }} + key: minioSecretKey + - name: UPLOAD_ONLY_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ template "es.fullname" . }} + key: minioUploadAccessKey + - name: UPLOAD_ONLY_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ template "es.fullname" . }} + key: minioUploadSecretKey + - name: UPLOAD_BUCKET + value: {{default "uploads" .Values.anonlink.objectstore.uploadBucket.name | quote }} + command: + - "/bin/sh" + - "/opt/init-objectstore.sh" + volumeMounts: + - name: script-volume + mountPath: /opt + volumes: + - name: script-volume + configMap: + name: {{ template "es.fullname" . }}-init-objectstore-config + restartPolicy: Never +{{- end }} diff --git a/deployment/entity-service/templates/initial-objectstore-config.yaml b/deployment/entity-service/templates/initial-objectstore-config.yaml new file mode 100644 index 00000000..3807eaa8 --- /dev/null +++ b/deployment/entity-service/templates/initial-objectstore-config.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "es.fullname" . }}-init-objectstore-config + labels: + {{- include "es.release_labels" . | indent 4 }} +data: + init-objectstore.sh: | + #!/bin/sh + export MC_HOST_minio=http://$MINIO_ACCESS_KEY:$MINIO_SECRET_KEY@$MINIO_SERVER + echo $MC_HOST_minio + mc --version + mc mb minio/$UPLOAD_BUCKET + mc admin user add minio $UPLOAD_ONLY_ACCESS_KEY $UPLOAD_ONLY_SECRET_ACCESS_KEY + mc admin policy set minio writeonly user=$UPLOAD_ONLY_ACCESS_KEY diff --git a/deployment/entity-service/values.yaml b/deployment/entity-service/values.yaml index fec7ef2e..ac2f3848 100644 --- a/deployment/entity-service/values.yaml +++ b/deployment/entity-service/values.yaml @@ -8,6 +8,15 @@ anonlink: ## Set arbitrary environment variables for the API and Workers. config: {} + objectstore: + uploadEnabled: "true" + # TODO can't leave defaults once minio exposed + uploadAccessKey: "EXAMPLE_UPLOAD_KEY" + uploadSecretKey: "EXAMPLE_UPLOAD_SECRET" + + uploadBucket: + name: "uploads" + api: @@ -89,6 +98,14 @@ api: cpu: 250m memory: 256Mi + ## A job that creates an upload only object store user. + objectstoreinit: + enabled: "true" + + image: + repository: minio/mc + tag: RELEASE.2020-01-13T22-49-03Z + ingress: ## By default, we do not want the service to be accessible outside of the cluster. enabled: false @@ -330,6 +347,7 @@ minio: ## https://github.com/helm/charts/blob/master/stable/minio/values.yaml ## Default access credentials for the object store + # TODO remove defaults once minio exposed to interwebs accessKey: "exampleMinioAccessKey" secretKey: "exampleMinioSecretKet" diff --git a/docs/designs/anonlink-upload-data-plan.md b/docs/designs/anonlink-upload-data-plan.md index e4a9d2c4..1907d207 100644 --- a/docs/designs/anonlink-upload-data-plan.md +++ b/docs/designs/anonlink-upload-data-plan.md @@ -222,6 +222,8 @@ An example policy which restricts holders to upload only into a project and data } ``` +This endpoint may fail if the object store does not support creating temporary credentials. +This feature may be entirely disabled in the server configuration, see `settings.py` and `values.yaml`. A possible future extension is to take advantage of MinIO's [Security Token Service (STS)](https://docs.min.io/docs/minio-sts-quickstart-guide.html) to enable clients to request temporary credentials using their existing identity management system. @@ -261,6 +263,8 @@ aws --profile restricted --endpoint-url http://localhost:9000 sts assume-role -- --role-arn arn:xxx:xxx:xxx:xxxx --role-session-name anything ``` +Alternatively use minio or boto3 to retrieve credentials. + The backend will use the minio-py client library to retrieve the temporary credentials. diff --git a/docs/development.rst b/docs/development.rst index 2498b966..937b67dc 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -102,6 +102,19 @@ The run info ``HASH`` stores: ``backend/entityservice/cache/active_runs.py`` for implementation. +Object Store +------------ + +MinIO is an S3 compatible object store which is used to handle uploads and store +files for the Anonlink Entity Service. + + + + + +Deployment Testing +------------------ + Testing Local Deployment ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/local-deployment.rst b/docs/local-deployment.rst index 225e511a..7f5ed400 100644 --- a/docs/local-deployment.rst +++ b/docs/local-deployment.rst @@ -21,28 +21,30 @@ Run Run docker compose:: - docker-compose -p n1es -f tools/docker-compose.yml up + docker-compose -p anonlink -f tools/docker-compose.yml up This will start the following containers: -- nginx frontend (named ``n1es_nginx_1``) -- gunicorn/flask backend (named ``n1es_backend_1``) -- celery backend worker (named ``n1es_worker_1``) -- postgres database (named ``n1es_db_1``) -- redis job queue (named ``n1es_redis_1``) +- nginx frontend +- gunicorn/flask backend +- celery backend worker +- postgres database +- redis job queue - minio object store - jaeger opentracing +A temporary container that initializes the database will also be created and soon exit. + The REST api for the service is exposed on port ``8851`` of the nginx container, which docker will map to a high numbered port on your host. -The address of the nginx endpoint can be found with:: +The address of the REST API endpoint can be found with:: - docker port n1es_nginx_1 "8851" + docker-compose -p anonlink -f tools/docker-compose.yml port nginx 8851 For example to `GET` the service status:: - $ export ENTITY_SERVICE=`docker port n1es_nginx_1 "8851"` + $ export ENTITY_SERVICE=`docker-compose -p anonlink -f tools/docker-compose.yml port nginx 8851` $ curl $ENTITY_SERVICE/api/v1/status { "status": "ok", @@ -55,7 +57,7 @@ the DB volumes, which will persist and conflict with the next call to `docker-compose ... up` unless they are removed. Removing these volumes is easy, just run:: - docker-compose -p n1es -f tools/docker-compose.yml down -v + docker-compose -p anonlink -f tools/docker-compose.yml down -v in between calls to `docker-compose ... up`. diff --git a/docs/production-deployment.rst b/docs/production-deployment.rst index b13a4c81..6a87d6b7 100644 --- a/docs/production-deployment.rst +++ b/docs/production-deployment.rst @@ -95,6 +95,7 @@ the credentials: * ``global.postgresql.postgresqlPassword`` * ``redis.password`` (and ``redis-ha.redisPassword`` if provisioning redis) * ``minio.accessKey`` and ``minio.secretKey`` +* ``anonlink.objectstore.uploadAccessKey`` and ``anonlink.objectstore.uploadSecretKey`` Configuration of the celery workers diff --git a/e2etests/Dockerfile b/e2etests/Dockerfile index 26f3651a..e17237fc 100644 --- a/e2etests/Dockerfile +++ b/e2etests/Dockerfile @@ -8,5 +8,6 @@ RUN python -c "import anonlink; print('anonlink version:', anonlink.__version__) python -c "import clkhash; print('clkhash version:', clkhash.__version__)" ENV SERVER http://nginx:8851 -ENV INITIAL_DELAY 20 -CMD python -m pytest -n 2 e2etests/tests --junitxml=testResults.xml -x + +CMD dockerize -wait tcp://db:5432 -wait tcp://nginx:8851/api/v1/status -timeout 5m \ + /bin/sh -c "sleep 5 && python -m pytest -n 2 e2etests/tests --junitxml=testResults.xml -x" diff --git a/e2etests/tests/conftest.py b/e2etests/tests/conftest.py index 50d20208..321ac74f 100644 --- a/e2etests/tests/conftest.py +++ b/e2etests/tests/conftest.py @@ -140,6 +140,16 @@ def valid_project_params(request, result_type_number_parties_or_none): params_dict['number_parties'] = number_parties_or_none return params_dict +@pytest.fixture(scope='function') +def a_project(request, requests): + project = create_project_no_data( + requests, + result_type="groups", + number_parties=2) + yield project + # Release project resource + delete_project(requests, project) + @pytest.fixture(scope='function') def project(request, requests, result_type_number_parties): @@ -185,3 +195,7 @@ def groups_project(request, requests): [(t, 1) for t in PROJECT_RESULT_TYPES_NP])) def invalid_result_type_number_parties(request): yield request.param + +@pytest.fixture +def binary_test_file_path(request): + return os.path.join(os.path.dirname(os.path.realpath(__file__)), 'testdata/clks_128B_1k.bin') \ No newline at end of file diff --git a/e2etests/tests/test_project_uploads.py b/e2etests/tests/test_project_uploads.py index 7edfb5c9..4d2b83a9 100644 --- a/e2etests/tests/test_project_uploads.py +++ b/e2etests/tests/test_project_uploads.py @@ -1,6 +1,7 @@ import time import os import pytest +from minio import Minio from e2etests.config import url from e2etests.util import ( @@ -23,12 +24,45 @@ def test_project_single_party_data_uploaded(requests, valid_project_params): 'clks': generate_json_serialized_clks(100) } ) - assert r.status_code == 201 + assert r.status_code == 201, r.text upload_response = r.json() assert 'receipt_token' in upload_response -def test_project_binary_data_uploaded(requests, valid_project_params): +def test_project_external_data_uploaded(requests, valid_project_params, binary_test_file_path): + new_project_data = requests.post(url + 'projects', + json={ + 'schema': {}, + **valid_project_params + }).json() + r = requests.get( + url + 'projects/{}/authorize-external-upload'.format(new_project_data['project_id']), + headers={'Authorization': new_project_data['update_tokens'][0]}, + ) + assert r.status_code == 200 + upload_response = r.json() + + credentials = upload_response['credentials'] + upload_info = upload_response['upload'] + + # Use Minio python client to upload data + mc = Minio( + upload_info['endpoint'], + access_key=credentials['AccessKeyId'], + secret_key=credentials['SecretAccessKey'], + session_token=credentials['SessionToken'], + region='us-east-1', + secure=False + ) + + + etag = mc.fput_object(upload_info['bucket'], upload_info['path'] + "/test", binary_test_file_path) + + # Later - once the upload endpoint is complete notify the server + # of the uploaded data + + +def test_project_binary_data_uploaded(requests, valid_project_params, binary_test_file_path): new_project_data = requests.post(url + '/projects', json={ 'schema': {}, @@ -38,12 +72,10 @@ def test_project_binary_data_uploaded(requests, valid_project_params): expected_number_parties = get_expected_number_parties(valid_project_params) assert len(update_tokens) == expected_number_parties - small_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'testdata/clks_128B_1k.bin') - for token in update_tokens: upload_binary_data_from_file( requests, - small_file_path, new_project_data['project_id'], token, 1000) + binary_test_file_path, new_project_data['project_id'], token, 1000) run_id = post_run(requests, new_project_data, 0.99) result = get_run_result(requests, new_project_data, run_id, wait=True) diff --git a/e2etests/tests/test_uploads.py b/e2etests/tests/test_uploads.py new file mode 100644 index 00000000..57a1221e --- /dev/null +++ b/e2etests/tests/test_uploads.py @@ -0,0 +1,68 @@ +import io + +import minio +import pytest + +from e2etests.config import url + + +class TestAuthorizeExternalUpload: + + def test_get_auth_credentials(self, requests, a_project): + + for dp_index in range(2): + pid = a_project['project_id'] + res = requests.get(url + f"projects/{pid}/authorize-external-upload", + headers={'Authorization': a_project['update_tokens'][dp_index]}) + + assert res.status_code == 200 + raw_json = res.json() + assert "credentials" in raw_json + credentials = raw_json['credentials'] + assert "upload" in raw_json + + minio_endpoint = raw_json['upload']['endpoint'] + bucket_name = raw_json['upload']['bucket'] + allowed_path = raw_json['upload']['path'] + + for key in ['AccessKeyId', 'SecretAccessKey', 'SessionToken', 'Expiration']: + assert key in credentials + + # Test we can create and use these credentials via a Minio client + restricted_mc_client = minio.Minio( + minio_endpoint, + credentials['AccessKeyId'], + credentials['SecretAccessKey'], + credentials['SessionToken'], + region='us-east-1', + secure=False + ) + + # Client shouldn't be able to list buckets + with pytest.raises(minio.error.AccessDenied): + restricted_mc_client.list_buckets() + + with pytest.raises(minio.error.AccessDenied): + restricted_mc_client.put_object(bucket_name, 'testobject', io.BytesIO(b'data'), length=4) + + # Should be able to put an object in the approved path + restricted_mc_client.put_object(bucket_name, allowed_path + '/blocks.json', io.BytesIO(b'data'), length=4) + # Permission exists to upload multiple files in the approved path + restricted_mc_client.put_object(bucket_name, allowed_path + '/encodings.bin', io.BytesIO(b'data'), length=4) + + # Client shouldn't be allowed to download files + with pytest.raises(minio.error.AccessDenied): + restricted_mc_client.get_object(bucket_name, allowed_path + '/blocks.json') + + # Client shouldn't be allowed to delete uploaded files: + with pytest.raises(minio.error.AccessDenied): + restricted_mc_client.remove_object(bucket_name, allowed_path + '/blocks.json') + + # Client shouldn't be able to list objects in the bucket + with pytest.raises(minio.error.AccessDenied): + list(restricted_mc_client.list_objects(bucket_name)) + + # client shouldn't be able to list objects even in the approved path + with pytest.raises(minio.error.AccessDenied): + list(restricted_mc_client.list_objects(bucket_name, prefix=allowed_path)) + diff --git a/tools/ci.yml b/tools/ci.yml index e1d64958..6e397d3a 100644 --- a/tools/ci.yml +++ b/tools/ci.yml @@ -5,8 +5,8 @@ services: image: data61/anonlink-test:${TAG:-latest} environment: - SERVER=http://nginx:8851 - - INITIAL_DELAY=20 - JAEGER_AGENT_HOST=jaeger + - INITIAL_DELAY=5 depends_on: - backend - worker @@ -16,14 +16,20 @@ services: image: data61/anonlink-app:${TAG:-latest} environment: - SERVER=http://nginx:8851 - - INITIAL_DELAY=20 - DATABASE_PASSWORD=rX%QpV7Xgyrz - command: /bin/sh -c "dockerize -wait tcp://db:5432 -wait tcp://nginx:8851/api/v1/status -timeout 1m python -m pytest -n 1 entityservice/integrationtests --junitxml=testResults.xml -x" + - MINIO_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE + - MINIO_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + - UPLOAD_OBJECT_STORE_ACCESS_KEY=EXAMPLE_UPLOAD_ACCESS_KEY + - UPLOAD_OBJECT_STORE_SECRET_KEY=EXAMPLE_UPLOAD_SECRET_ACCESS_KEY + - INITIAL_DELAY=5 + command: dockerize -wait tcp://db:5432 -wait tcp://nginx:8851/api/v1/status -timeout 5m + /bin/sh -c "sleep 5 && python -m pytest -n 1 entityservice/integrationtests --junitxml=testResults.xml -x" depends_on: - db - backend - worker - nginx + - objectstore_init benchmark: image: data61/anonlink-benchmark:${TAG:-latest} diff --git a/tools/docker-compose.yml b/tools/docker-compose.yml index fcab038b..00e8147b 100644 --- a/tools/docker-compose.yml +++ b/tools/docker-compose.yml @@ -16,14 +16,20 @@ services: retries: 5 minio: - image: minio/minio:RELEASE.2020-04-04T05-39-31Z + image: minio/minio:RELEASE.2020-04-15T19-42-18Z command: server /export environment: - MINIO_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE - MINIO_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY volumes: - minio:/export - + ports: + - 9000:9000 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 redis: image: redis:5.0 @@ -37,6 +43,9 @@ services: - DATABASE_PASSWORD=rX%QpV7Xgyrz - MINIO_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE - MINIO_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + - UPLOAD_OBJECT_STORE_BUCKET=uploads + - UPLOAD_OBJECT_STORE_ACCESS_KEY=EXAMPLE_UPLOAD_ACCESS_KEY + - UPLOAD_OBJECT_STORE_SECRET_KEY=EXAMPLE_UPLOAD_SECRET_ACCESS_KEY - FLASK_DB_MIN_CONNECTIONS=1 - FLASK_DB_MAX_CONNECTIONS=10 - JAEGER_AGENT_HOST=jaeger @@ -45,6 +54,7 @@ services: - db_init - redis - minio + - objectstore_init # The application server can also setup the database db_init: @@ -57,6 +67,28 @@ services: depends_on: - db + # Setup the object store to have another more restricted user + objectstore_init: + image: minio/mc:RELEASE.2020-04-19T19-17-53Z + environment: + # Provide root credentials to MINIO to set up more restricted service accounts + # MC_HOST_alias is equivalent to manually configuring a minio host + # mc config host add minio http://minio:9000 + #- MC_HOST_minio=http://AKIAIOSFODNN7EXAMPLE:wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY@minio:9000 + - MINIO_SERVER=minio:9000 + - MINIO_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE + - MINIO_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + # Account which will have upload only object store access. + - UPLOAD_ONLY_ACCESS_KEY=EXAMPLE_UPLOAD_ACCESS_KEY + - UPLOAD_ONLY_SECRET_ACCESS_KEY=EXAMPLE_UPLOAD_SECRET_ACCESS_KEY + - UPLOAD_BUCKET=uploads + entrypoint: | + /bin/sh /opt/init-object-store.sh + volumes: + - ./tools/init-object-store.sh:/opt/init-object-store.sh:ro + depends_on: + - minio + # A celery worker worker: image: data61/anonlink-app:${TAG:-latest} diff --git a/tools/init-object-store.sh b/tools/init-object-store.sh new file mode 100755 index 00000000..945e72bf --- /dev/null +++ b/tools/init-object-store.sh @@ -0,0 +1,8 @@ +#!/bin/sh +mc --version +echo "== Initialising Object Store ==" +export MC_HOST_minio=http://$MINIO_ACCESS_KEY:$MINIO_SECRET_KEY@$MINIO_SERVER +mc mb minio/$UPLOAD_BUCKET +mc admin user add minio $UPLOAD_ONLY_ACCESS_KEY $UPLOAD_ONLY_SECRET_ACCESS_KEY +mc admin policy set minio writeonly user=$UPLOAD_ONLY_ACCESS_KEY +echo "== Object Store Initialised =="