Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions .azurePipeline/runDockerComposeTests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Usage: $SCRIPT_NAME [parameters]

Script run by the Azure Pipeline to start all the services required by the entity service with
docker-compose and the test or benchmark container, copying the results in a chosen file.
The result is an xml file for the type 'tests' and 'tutorials', and a JSON file for the type 'benchmark'.
The result is an xml file for the type 'integrationtests' and 'tutorials', and a JSON file for the type 'benchmark'.

-p Project name (used by docker-compose with '-p'). REQUIRED.
-o Output file where to store the results. [$RESULT_FILE]
Expand Down Expand Up @@ -60,9 +60,11 @@ commandPrefix="docker-compose -f tools/docker-compose.yml -f tools/ci.yml --proj
if [[ "$NO_ANSI" == "TRUE" ]]; then
commandPrefix="$commandPrefix --no-ansi "
fi

echo "Initialise the database"
$commandPrefix -p $PROJECT_NAME up db_init > /dev/null 2>&1

echo "Initialise the database and the object store"
$commandPrefix -p $PROJECT_NAME up objectstore_init db_init
echo "Initialisation complete"


if [[ $TYPE == "integrationtests" ]]; then
CREATED_RESULT_FILE="/var/www/testResults.xml"
Expand All @@ -75,7 +77,6 @@ else
exit 1
fi

echo "Start type $TYPE"
$commandPrefix -p $PROJECT_NAME up --abort-on-container-exit --exit-code-from $TYPE db minio redis backend worker nginx $TYPE
exit_code=$?
echo "Retrieve the $TYPE tests results."
Expand Down
86 changes: 78 additions & 8 deletions backend/entityservice/api_def/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
# descriptions.
openapi: 3.0.0
info:
version: '1.3'
version: '1.13'
title: Entity Matching API
x-logo:
url: http://clkhash.readthedocs.io/en/latest/_static/logo.svg
contact:
name: 'Confidential Computing, Data61 | CSIRO'
email: [email protected]
url: https://github.com/data61/anonlink-entity-service

description: >-
Allows multiple organisations to carry out private record linkage -
without disclosing personally identifiable information.
Expand Down Expand Up @@ -86,6 +88,8 @@ info:
servers:
- url: https://anonlink.easd.data61.xyz/api/v1
description: default EASD cluster
- url: http://localhost:8851/api/v1
description: Local deployment

paths:
/status:
Expand Down Expand Up @@ -276,6 +280,44 @@ paths:
'503':
$ref: '#/components/responses/RateLimited'

'/projects/{project_id}/authorize-external-upload':
get:
operationId: entityservice.views.objectstore.authorize_external_upload
summary: Retrieve temporary objest store credentials for uploading data
tags:
- Project
description: |
Returns a set of temporary security credentials that the client can use to upload data to the
object store.

A valid **upload token** is required to authorise this call. The returned *Temporary Object
Store Credentials* can be used with any S3 compatible client. For example by using `boto3` in
Python. The returned credentials are restricted to allow only uploading data to a particular path
in a particular bucket for a finite period (defaulting to 12 hours).

Note this feature may be disabled by the administrator, in this case the endpoint will return a
`500` server error.
parameters:
- $ref: '#/components/parameters/project_id'
- $ref: '#/components/parameters/token'
responses:
'201':
description: Temporary Object Store Credentials
content:
application/json:
schema:
$ref: '#/components/schemas/ObjectStoreCredentials'
'400':
$ref: '#/components/responses/BadRequest'
'403':
$ref: '#/components/responses/Unauthorized'
'404':
$ref: '#/components/responses/NotFound'
'500':
$ref: '#/components/responses/Error'
'503':
$ref: '#/components/responses/RateLimited'

'/projects/{project_id}/clks':
post:
operationId: entityservice.views.project.project_clks_post
Expand Down Expand Up @@ -327,12 +369,6 @@ paths:
oneOf:
- $ref: '#/components/schemas/CLKUpload'
- $ref: '#/components/schemas/CLKnBlockUpload'
# unfortunately connexion can not handle multiple different encoding types on an endpoint.
#application/octet-stream:
# schema:
# type: string
# format: binary

responses:
'201':
description: Data Uploaded
Expand Down Expand Up @@ -833,7 +869,7 @@ components:
minimum: 0
description:
type: string
description: oportunity to give those numbers some context, what are we counting here?
description: opportunity to give those numbers some context, what are we counting here?
relative:
type: number
format: double
Expand Down Expand Up @@ -1076,3 +1112,37 @@ components:
type: string
message:
type: string

ObjectStoreCredentials:
description: Temporary credentials allowing client to upload a file to an object store.
type: object
properties:
upload:
description: |
Configuration of object store to upload file/s to. Specifies the server, bucket and
the approved path. The attached credentials are restricted to only allow uploads to
this path.
type: object
properties:
endpoint:
type: string
description: Hostname, and port of object store. E.g. minio.anonlink.example.com:9000
bucket:
type: string
description: Target bucket
path:
type: string
description: Target path

credentials:
description: Object Store credentials (compatible with both AWS & MinIO)
type: object
properties:
AccessKeyId:
type: string
SecretAccessKey:
type: string
Expiration:
type: string
SessionToken:
type: string
4 changes: 2 additions & 2 deletions backend/entityservice/database/selections.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,8 @@ def get_all_objects_for_project(db, project_id):
WHERE dp = %s
""", [dp['id']], one=True)

if clk_file_ref is not None:
logger.info("blooming data file found: {}".format(clk_file_ref))
if clk_file_ref is not None and clk_file_ref['file'] is not None:
logger.info("upload record found: {}".format(clk_file_ref))
object_store_files.append(clk_file_ref['file'])

if result_type == "similarity_scores":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
from time import sleep

import minio
import pytest

from entityservice.settings import Config as config

@pytest.fixture(scope='session')
def upload_restricted_minio_client():

sleep(int(os.getenv('INITIAL_DELAY', '5')))

restricted_mc_client = minio.Minio(
config.UPLOAD_OBJECT_STORE_SERVER,
config.UPLOAD_OBJECT_STORE_ACCESS_KEY,
config.UPLOAD_OBJECT_STORE_SECRET_KEY,
region='us-east-1',
secure=False
)
restricted_mc_client.set_app_info("anonlink-restricted", "testing client")
return restricted_mc_client
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
Testing:
- uploading over existing files
- using deleted credentials
- using expired credentials

"""
import io

import minio
from minio import Minio
import pytest
from minio.credentials import AssumeRoleProvider, Credentials

from entityservice.object_store import connect_to_object_store, connect_to_upload_object_store
from entityservice.settings import Config

restricted_upload_policy = """{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"s3:PutObject"
],
"Effect": "Allow",
"Resource": [
"arn:aws:s3:::uploads/2020/*"
],
"Sid": "Upload-access-to-specific-bucket-only"
}
]
}
"""


class TestAssumeRole:

def test_temp_credentials_minio(self):

upload_endpoint = Config.UPLOAD_OBJECT_STORE_SERVER
bucket_name = "uploads"

root_mc_client = connect_to_object_store()
upload_restricted_minio_client = connect_to_upload_object_store()
if not root_mc_client.bucket_exists(bucket_name):
root_mc_client.make_bucket(bucket_name)

with pytest.raises(minio.error.AccessDenied):
upload_restricted_minio_client.list_buckets()

# Should be able to put an object though
upload_restricted_minio_client.put_object(bucket_name, 'testobject', io.BytesIO(b'data'), length=4)

credentials_provider = AssumeRoleProvider(upload_restricted_minio_client,
Policy=restricted_upload_policy
)
temp_creds = Credentials(provider=credentials_provider)

newly_restricted_mc_client = Minio(upload_endpoint, credentials=temp_creds, region='us-east-1', secure=False)

with pytest.raises(minio.error.AccessDenied):
newly_restricted_mc_client.list_buckets()

# Note this put object worked with the earlier credentials
# But should fail if we have applied the more restrictive policy
with pytest.raises(minio.error.AccessDenied):
newly_restricted_mc_client.put_object(bucket_name, 'testobject2', io.BytesIO(b'data'), length=4)

# this path is allowed in the policy however
newly_restricted_mc_client.put_object(bucket_name, '2020/testobject', io.BytesIO(b'data'), length=4)
34 changes: 29 additions & 5 deletions backend/entityservice/object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,34 @@ def connect_to_object_store():
secure=False
)
logger.debug("Connected to minio")
if not mc.bucket_exists(config.MINIO_BUCKET):
logger.info("Creating bucket {}".format(config.MINIO_BUCKET))
mc.set_app_info("anonlink-client", "minio general client")
create_bucket(mc, config.MINIO_BUCKET)
return mc


def connect_to_upload_object_store():
"""
Instantiate a minio client with an upload only policy applied.

:return:
"""
mc = minio.Minio(
config.UPLOAD_OBJECT_STORE_SERVER,
config.UPLOAD_OBJECT_STORE_ACCESS_KEY,
config.UPLOAD_OBJECT_STORE_SECRET_KEY,
region="us-east-1",
secure=False
)
mc.set_app_info("anonlink-upload", "minio client for uploads")
logger.debug("Connected to minio upload account")

return mc


def create_bucket(minio_client, bucket):
if not minio_client.bucket_exists(bucket):
logger.info("Creating bucket {}".format(bucket))
try:
mc.make_bucket(config.MINIO_BUCKET)
minio_client.make_bucket(bucket)
except minio.error.BucketAlreadyOwnedByYou:
logger.info("The bucket {} was already created.".format(config.MINIO_BUCKET))
return mc
logger.info("The bucket {} was already created.".format(bucket))
8 changes: 7 additions & 1 deletion backend/entityservice/settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python3.4
"""
Config shared between the application backend and the celery workers.
"""
Expand Down Expand Up @@ -31,6 +30,13 @@ class Config(object):
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', '')
MINIO_BUCKET = os.getenv('MINIO_BUCKET', 'entityservice')

UPLOAD_OBJECT_STORE_ENABLED = os.getenv('UPLOAD_OBJECT_STORE_ENABLED', 'true').lower() == "true"
UPLOAD_OBJECT_STORE_STS_DURATION = int(os.getenv('UPLOAD_OBJECT_STORE_STS_DURATION', '43200'))
UPLOAD_OBJECT_STORE_SERVER = os.getenv('UPLOAD_OBJECT_STORE_SERVER', MINIO_SERVER)
UPLOAD_OBJECT_STORE_ACCESS_KEY = os.getenv('UPLOAD_OBJECT_STORE_ACCESS_KEY', '')
UPLOAD_OBJECT_STORE_SECRET_KEY = os.getenv('UPLOAD_OBJECT_STORE_SECRET_KEY', '')
UPLOAD_OBJECT_STORE_BUCKET = os.getenv('UPLOAD_OBJECT_STORE_BUCKET', 'anonlink-uploads')

DATABASE_SERVER = os.getenv('DATABASE_SERVER', 'db')
DATABASE = os.getenv('DATABASE', 'postgres')
DATABASE_USER = os.getenv('DATABASE_USER', 'postgres')
Expand Down
Loading