diff --git a/backend/entityservice/__init__.py b/backend/entityservice/__init__.py index 3f6e14bd..2bc13838 100644 --- a/backend/entityservice/__init__.py +++ b/backend/entityservice/__init__.py @@ -28,8 +28,9 @@ from entityservice.settings import Config as config from entityservice.utils import fmt_bytes, iterable_to_stream -con_app.add_api(pathlib.Path("swagger.yaml"), +con_app.add_api(pathlib.Path("openapi.yaml"), base_path='/', + options={'swagger_ui': False}, strict_validation=config.CONNEXION_STRICT_VALIDATION, validate_responses=config.CONNEXION_RESPONSE_VALIDATION) diff --git a/backend/entityservice/api_def/openapi.yaml b/backend/entityservice/api_def/openapi.yaml new file mode 100644 index 00000000..c23cf816 --- /dev/null +++ b/backend/entityservice/api_def/openapi.yaml @@ -0,0 +1,1078 @@ +# An OpenAPI 3.0 definition of the Entity Service API. +# +# This is used for generating API documentation and the types used by the +# server. +# +# Some style notes: +# - This file is used by ReDoc, which allows GitHub Flavored Markdown in +# descriptions. +openapi: 3.0.0 +info: + version: '1.3' + title: Entity Matching API + x-logo: + url: http://clkhash.readthedocs.io/en/latest/_static/logo.svg + contact: + name: 'Confidential Computing, Data61 | CSIRO' + email: confidential-computing@csiro.au + description: >- + Allows multiple organisations to carry out private record linkage - + without disclosing personally identifiable information. + + In general: + + * All parameters and returned objects are JSON with content-type set to + `"application/json"`. + + * All authentication tokens are 48 character hex strings. + + * The `Project`/`Run` resource identifiers are also 48 char hex strings. + + * HTTP status codes are used to distinguish server outcomes. + + + **Note:** Several important concepts including security considerations are described in + [separate documentation](./concepts.html). + + ### Introduction + + This is a service that computes matches of entity records between two parties. + The personally identifiable information used for linking is first locally transformed + to anonymous linking codes called [Cryptographic Longterm Keys](concepts.html#cryptographic-longterm-key) + (CLKs). Each party then uploads its CLKs to the service for matching. + The service supports three different [types of outputs](concepts.html#result-types) + of matching results, varying in privacy properties. + For the CLKs to be comparable, the parties need to agree on and follow a particular + [linkage schema](concepts.html#schema) and agree on a shared secret before generating + and uploading the CLKs. + + + ### Security + + See the linked [security documentation](./security.html). + + + __Warning__: There are no restrictions on accessing the service, i.e., everyone can call the different endpoints. + However, authorization is required to access sensitive data. The authorization is described [here](security.html#auth). + + + Most endpoints require an authorization header to be sent along with any request. + + + ### Generating CLKs + + See the [anonlink-client](https://github.com/data61/anonlink-client) project for details and a command line + tool to locally generate CLKs from entity data to upload. + + + + ## Matching Protocols + + The Entity Service supports three different **result types** with varying privacy properties which define the produced result, + and who may see the which part of the output. + + See the documentation section on [output types](./concepts.html#result-types). + + The options are: + + * `"permutations"` - Creates random permutations and a mask. + * `"similarity_scores"` - Outputs a list of similarity scores of `[indexA, indexB, score]`, where `score` + represents the likelihood that `indexA = indexB`. + * `"groups"` - Outputs a list of groups of records, where each group represents one entity. + + Only `"groups"` supports multi-party linkage. `"permutations"` and `"similarity_scores"` only support linkage + with two parties. + +servers: +- url: https://anonlink.easd.data61.xyz/api/v1 + description: default EASD cluster + +paths: + /status: + get: + operationId: entityservice.views.general.status_get + summary: Service status + tags: + - General + description: | + Suitable for using as load balancer health check. Checks that + redis cache and database are operational. Provides very high level system wide + metrics. + responses: + '200': + description: Service status. + content: + application/json: + schema: + type: object + properties: + status: + type: string + description: Will be "ok" if everything is working. + project_count: + description: Number of projects created on this server. + type: integer + rate: + type: integer + description: The most recent reported comparison rate of the system (comparisons per second). + example: + { + "status": "ok", + "project_count": 3118, + "rate": 1793757 + } + '500': + description: | + System is experiencing difficulties. + E.g. application can't connect to database. + /version: + get: + operationId: entityservice.views.general.version_get + summary: Version + tags: + - General + description: | + Return version numbers of libraries used. + responses: + '200': + description: Versions + content: + application/json: + schema: + type: object + properties: + anonlink: + type: string + entityservice: + type: string + python: + type: string + example: + {"anonlink": "0.6.3", "entityservice": "v1.7.2", "python": "3.6.4"} + /projects: + get: + operationId: entityservice.views.project.projects_get + summary: List of record linkage projects. + tags: + - Project + description: | + List of all privacy preserving record linkage projects. + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectList' + post: + operationId: entityservice.views.project.projects_post + summary: Create and configure a new linkage project + tags: + - Project + description: | + Prepare resources and endpoints to accept CLK uploads from data providers. + Provides the caller with credentials that will be required for any further + interaction with this project. + + ### Schema + + Although these comprise the column names, the raw data will never be + sent to this entity service. + + Each participant will be able to see the schema to verify it is + what they expect. Schema details should have been determined and agreed + on by each party before starting a mapping task. This is documented + in [schema](./concepts.html#schema). + + ### Result Type + + The result type specifies what information is available after the entity + resolving process has completed. All project **runs** will use this result type. + + The parts of the computed linkage results that are accessable by the different tokens depends on the + `result_type`: + + - `"similarity_scores"`, or `"groups"`\ + If the `result_type` is `"similarity_scores"` or `"groups"` then the results can be accessed with the + `result_token``token`, which is provided when initially creating the mapping. + + - `"permutations"`\ + If the `result_type` is `permutations`, then the data providers can access their respective permutation with + their individual `receipt_token`, which they obtain when adding data to the mapping. + The mask can be accessed with the `result_token`. + + Only `"groups"` supports multi-party linkage. If the result type is `"similarity_scores"` or + `"permutations"`, then the number of parties must be 2. + + requestBody: + description: new project info + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/NewProject' + responses: + '201': + description: New project created + content: + application/json: + schema: + $ref: '#/components/schemas/NewProjectResponse' + '400': + $ref: "#/components/responses/BadRequest" + '403': + $ref: '#/components/responses/Unauthorized' + '503': + $ref: '#/components/responses/RateLimited' + + '/projects/{project_id}': + parameters: + - $ref: '#/components/parameters/token' + - $ref: '#/components/parameters/project_id' + + get: + operationId: entityservice.views.project.project_get + summary: Description of a particular linkage project + tags: + - Project + description: | + Details provided at project creation. + + responses: + '200': + description: Project description + content: + application/json: + schema: + $ref: '#/components/schemas/ProjectDescription' + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + delete: + operationId: entityservice.views.project.project_delete + summary: Delete linkage project. + tags: + - Project + description: Remove the project and all associated data from the service. + responses: + '204': + description: Successfully deleted + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + '/projects/{project_id}/clks': + post: + operationId: entityservice.views.project.project_clks_post + summary: Upload encoded PII data to a linkage project. + tags: + - Project + description: | + Called by each of the data providers with their calculated `CLK` vectors. + The project must have been created, and the caller must have both the + `project_id` and a valid `upload_token` in order to contribute data. + + The data uploaded must be of one of the following formats. + - CLKs only upload: An array of base64 encoded [CLKs](./concepts.html#cryptographic-longterm-keys), one per + entity. + - CLKs with blocking information upload: An array of base64 encoded CLKs with corresponding blocking + information. One element in this array is an array with the first element being a base64 encoded CLK followed + by the block IDs the CLK corresponds to. + + When the last party successfully uploads their data any existing project runs are queued + for execution. + + The uploaded encodings must all have the same length in bytes. If the project's linkage schema + specifes an encoding size it will be checked and enforced before any runs are computed. Note a + minimum and maximum encoding size can be set at the server level at deployment time. + Currently anonlink requires this _encoding size_ to be a multiple of 8. An example value is 128 Bytes. + + Note in the default deployment the maximum request size is set to `~10 GB`, which __should__ + translate to just over 20 million entities. + + ### JSON Upload + + This endpoint can be used with the Content-Type: application/json and uses the `CLKUpload` + structure of a JSON array of base64 encoded strings. + + ### Binary Upload + + An additional api endpoint (/projects/{project_id}/binaryclks) has been added for uploading CLKs as a binary + file. This is to allow for faster and more efficient data transfer. + + parameters: + - $ref: '#/components/parameters/project_id' + - $ref: '#/components/parameters/token' + requestBody: + description: the encoded PII + required: true + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/CLKUpload' + - $ref: '#/components/schemas/CLKnBlockUpload' + # unfortunately connexion can not handle multiple different encoding types on an endpoint. + #application/octet-stream: + # schema: + # type: string + # format: binary + + responses: + '201': + description: Data Uploaded + content: + application/json: + schema: + $ref: '#/components/schemas/UploadReceipt' + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + '/projects/{project_id}/binaryclks': + post: + operationId: entityservice.views.project.project_binaryclks_post + summary: Upload binary encoded PII data to a linkage project. + tags: + - Project + description: | + An experimental api for uploading CLKs as a binary file. This is to allow for + faster and more efficient data transfer. + Called by each of the data providers with their calculated `CLK` vectors. + The project must have been created, and the caller must have both the + `project_id` and a valid `upload_token` in order to contribute data. + + When the last party successfully uploads their data any existing project runs are queued + for execution. + + The uploaded encodings must all have the same length in bytes. If the project's linkage schema + specifes an encoding size it will be checked and enforced before any runs are computed. Note a + minimum and maximum encoding size can be set at the server level at deployment time. + Currently anonlink requires this _encoding size_ to be a multiple of 8. An example value is 128 Bytes. + + Additional headers are required: + + Content-Type: application/octet-stream + Hash-Count: + Hash-Size: + + The file is a series of CLK data: + + bit_packing_fmt = "!s" + + Where: + + "!" Use network byte order (big-endian). + "s" Store the 128 raw bytes of the bitarray. Default value is "128s". + + https://docs.python.org/3/library/struct.html#format-strings + + parameters: + - $ref: '#/components/parameters/project_id' + - $ref: '#/components/parameters/token' + - in: header + name: Hash-Count + required: true + schema: + type: integer + - in: header + name: Hash-Size + required: true + schema: + type: integer + requestBody: + description: the clks in binary + required: true + content: + application/octet-stream: + schema: + type: string + format: binary + + responses: + '201': + description: Data Uploaded + content: + application/json: + schema: + $ref: '#/components/schemas/UploadReceipt' + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + '/projects/{project_id}/runs': + parameters: + - $ref: '#/components/parameters/project_id' + - $ref: '#/components/parameters/token' + get: + operationId: entityservice.views.run.list.get + summary: List runs + tags: + - Run + description: | + List of run summaries. + Requires project level authorization. + + responses: + '200': + description: List of runs + content: + application/json: + schema: + $ref: '#/components/schemas/RunList' + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + post: + operationId: entityservice.views.run.list.post + summary: Create a new run + tags: + - Run + description: | + Queue up a computation to compare entities. + parameters: + - $ref: '#/components/parameters/project_id' + - $ref: '#/components/parameters/token' + requestBody: + description: shiny new run + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/NewRun' + responses: + '201': + description: Created new run + content: + application/json: + schema: + $ref: '#/components/schemas/RunDescription' + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + + '/projects/{project_id}/runs/{run_id}': + parameters: + - $ref: '#/components/parameters/project_id' + - $ref: '#/components/parameters/run_id' + - $ref: '#/components/parameters/token' + get: + operationId: entityservice.views.run.description.get + summary: Description of a particular run + tags: + - Run + description: | + This endpoint allows the caller to see what parameters a run was created with. + Also see the ``/status`` endpoint. + responses: + '200': + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/RunDescription' + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + delete: + operationId: entityservice.views.run.description.delete + summary: Delete a run + tags: + - Run + description: Cancel the run and remove all associated data from the service. + responses: + '204': + description: Successfully deleted + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + '/projects/{project_id}/runs/{run_id}/status': + get: + operationId: entityservice.views.run.status.get + summary: Run status + tags: + - Run + description: | + A status update on a particular record linkage run. + + `'time_added'`, `'time_started'` and `'time_completed'` are represented in + [ISO 8601 format](https://docs.python.org/3/library/datetime.html#datetime.datetime.isoformat). + + parameters: + - $ref: '#/components/parameters/project_id' + - $ref: '#/components/parameters/run_id' + - $ref: '#/components/parameters/token' + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '#/components/schemas/RunStatus' + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + + '/projects/{project_id}/runs/{run_id}/result': + get: + operationId: entityservice.views.run.results.get + summary: Run result + tags: + - Run + description: | + + ## Response + + The response schema depends on the mapping's `result_type`. OpenAPI doesn't + allow different schemas (in v2) so these are documented here: + + Note if the result isn't ready, a `404` will be returned. + + + ### result_type = "similarity_scores" + + The list of the pairwise similarity scores where the score + is greater than the similarity threshold. + Data is returned as `json` object e.g., + + { + "similarity_scores": + [ + [[0, 5], [1, 27], 1.0], + [[1, 10], [0, 14], 1.0] + ] + } + + + The element in the list is a list of three elements of the following format + `[[party_id_0, row_index_0], [party_id_1, row_index_1], score]`, where `[party_id_0, row_index_0]` + refers to the record at the index `row_index_0` from the dataset `party_id_0`, similarly for + `[party_id_1, row_index_1]`, and `score` is the similarity score representing the likelihood + that this pair or records is a match. + + `ds_index_0`, `rec_index_0, `ds_index_1` and `rec_index_1` start from 0, and `party_id_0 != party_id_1` but + are not necessarilly ordered. + + The value of `score` is between 0.0 and 1.0. The higher the score, the higher the similarity between + the compared CLKs. + + ### result_type = "permutations" + + The data providers will receive their respective permutation: + + + { + "permutation": [3,0,4,1,2], + "rows": 5 + } + + + The creator of the mapping gets access to the mask: + + { + "mask": [0,1,0,1,1] + } + + + In this example the first three elements in the original dataset are included, + but have been reordered to the second, fourth and fifth positions. The other elements + have been excluded with the mask. Note the permutation is specific to + the caller. Also any data after row 5 is to be discarded after the reordering has + been applied. + The mask is an array of 0/1 numbers. + + ### result_type = "groups" + + A list of groups of records, where each group represents one entity. A record is a + 2-tuple of the dataset index and record index within that dataset. + + For example: + {"groups": [[[0, 6], [1, 3], [3, 1]], + [[0, 8], [2, 6]], + [[0, 3], [3, 4]]]} + + Here, we have three groups. The first group contains three records and the other two + groups each contain two records. + + In the first group, [0, 6] is the 7th record of the 1st dataset (because we're using) + 0-indexing. A particular record will appear in at most one group. + + In this example, there are many records that are not matched to any other records (for + example [0, 5]). These trivial groups are omitted. + + + parameters: + - $ref: '#/components/parameters/project_id' + - $ref: '#/components/parameters/run_id' + - $ref: '#/components/parameters/token' + responses: + '200': + description: Successful response + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/Error' + '503': + $ref: '#/components/responses/RateLimited' + +components: + parameters: + token: + required: true + schema: + type: string + name: Authorization + in: header + project_id: + in: path + name: "project_id" + description: Resource identifier of the record linkage project + required: true + schema: + type: string + run_id: + in: path + name: "run_id" + description: Resource identifier of a particular run in a record linkage project + required: true + schema: + type: string + + # Descriptions of common responses + responses: + Error: + description: An unspecified error occured + content: + application/json: + schema: + $ref: '#/components/schemas/Problem' + NotFound: + description: The specified resource was not found + content: + application/json: + schema: + $ref: "#/components/schemas/Problem" + BadRequest: + description: Client's request could not be processed. + content: + application/json: + schema: + $ref: "#/components/schemas/Problem" + Unauthorized: + description: Authentication token could be missing, or invalid. + content: + application/json: + schema: + $ref: "#/components/schemas/Problem" + RateLimited: + description: Client is rate limited. Try requesting less frequently. + + + schemas: + + Problem: + type: object + properties: + type: + type: string + format: uri + description: | + An absolute URI that identifies the problem type. When dereferenced, + it SHOULD provide human-readable documentation for the problem type + (e.g., using HTML). + default: 'about:blank' + example: 'http://anonlink.readthedocs.io/en/latest/problems.html#ratelimited' + title: + type: string + description: | + A short, summary of the problem type. Written in english and readable + for engineers; example: Service Unavailable + status: + type: integer + format: int32 + description: | + The HTTP status code generated by the origin server for this occurrence + of the problem. + minimum: 100 + maximum: 600 + exclusiveMaximum: true + example: 503 + detail: + type: string + description: | + A human readable explanation specific to this occurrence of the + problem. + example: Connection to database timed out + instance: + type: string + format: uri + description: | + An absolute URI that identifies the specific occurrence of the problem. + It may or may not yield further information if dereferenced. + + ProjectList: + type: array + items: + type: object + description: Listing of projects. + properties: + project_id: + type: string + time_added: + type: string + description: ISO8601 formated datetime + format: 'date-time' + + Schema: + type: object + description: | + The `schema` is described in the [clkhash](http://clkhash.readthedocs.io/en/latest/schema.html) + documentation. + + A json-schema definition can be found [here](http://clkhash.readthedocs.io/en/latest/_static/schema.json#hashing-schema) + + ResultType: + type: string + description: | + Defines the output type of the mapping. Multi-party linkage requires `"groups"` to be used. + enum: + - groups + - permutations + - similarity_scores + + RunState: + type: string + description: What state is the run in + enum: + - created + - queued + - running + - completed + - error + + RunProgress: + description: Details how far completed the run is. + type: object + properties: + absolute: + description: The progress of this stage in absolute terms + type: number + format: int + minimum: 0 + description: + type: string + description: oportunity to give those numbers some context, what are we counting here? + relative: + type: number + format: double + minimum: 0.0 + maximum: 1.0 + description: | + Decimal between 0.0 and 1.0 indicating the relative progress of this stage. + required: + - absolute + - relative + + NewProject: + description: Configuration for a new linkage project. + type: object + x-body-name: project + properties: + schema: + $ref: '#/components/schemas/Schema' + result_type: + $ref: '#/components/schemas/ResultType' + number_parties: + description: | + How many data providers will participate in this project. Default value is 2. + If there are more than two data providers, then `result_type` must be + `"groups"`. + type: integer + minimum: 2 + name: + description: Any free text name for this project. + type: string + notes: + description: Any free text to store with this project. + type: string + required: + - schema + - result_type + + ProjectDescription: + allOf: + - $ref: '#/components/schemas/NewProject' + - type: object + properties: + project_id: + type: string + parties_contributed: + type: number + error: + type: boolean + + NewProjectResponse: + properties: + project_id: + type: string + update_tokens: + type: array + description: Array of unique tokens authorizing the upload of CLKs. + items: + type: string + result_token: + description: A token required to access mapping results. + type: string + + NewRun: + description: Configuration for a new run doing the linkage calculation + x-body-name: run + type: object + properties: + threshold: + type: number + format: double + description: | + The similarity threshold (a number between 0 and 1) above which two entites will be considered a match. + notes: + type: string + description: | + Some optional text that we store along with the run. + name: + type: string + description: | + Some short human readable name that we store along with the run. + + required: + - threshold + + RunDescription: + allOf: + - $ref: '#/components/schemas/NewRun' + - type: object + properties: + run_id: + type: string + + RunList: + type: array + items: + type: object + description: Listing of runs. + properties: + run_id: + type: string + time_added: + type: string + description: ISO8601 formated datetime + format: 'date-time' + state: + $ref: '#/components/schemas/RunState' + + RunStatus: + type: object + discriminator: + propertyName: state + properties: + time_added: + type: string + format: 'date-time' + state: + type: string + description: What state is the run in + enum: + - created + - queued + - running + - completed + - error + stages: + type: number + format: int + minimum: 1 + description: total number of stages for this run + current_stage: + $ref: '#/components/schemas/RunStage' + required: + - time_added + - state + - stages + - current_stage + + completed: + allOf: + - $ref: '#/components/schemas/RunStatus' + - type: object + properties: + time_started: + type: string + format: 'date-time' + time_completed: + type: string + format: 'date-time' + required: + - time_started + - time_completed + + error: + description: Describes the errored Run. Must contain a 'message', can provide more detail in 'detail'. + allOf: + - $ref: '#/components/schemas/RunStatus' + - type: object + properties: + message: + type: string + description: short error message + detail: + type: string + description: detailed error message + required: + - message + + queued: + allOf: + - $ref: '#/components/schemas/RunStatus' + - type: object + properties: + time_started: + type: string + format: 'date-time' + + started: + allOf: + - $ref: '#/components/schemas/queued' + + running: + allOf: + - $ref: '#/components/schemas/RunStatus' + - type: object + properties: + time_started: + type: string + format: 'date-time' + required: + - time_started + + RunStage: + description: describes the current stage of a run + type: object + properties: + number: + type: number + minimum: 1 + description: the number of the current stage, first stage has number 1 + description: + type: string + description: describes the current stage in human readable form + progress: + $ref: '#/components/schemas/RunProgress' + required: + - number + + CLKUpload: + description: Object that contains this party's Bloom Filters + type: object + required: [clks] + properties: + clks: + type: array + items: + type: string + format: byte + description: Base64 encoded CLK data + + CLKnBlockUpload: + description: Object that contains this party's Bloom Filters including blocking information + type: object + required: [clknblocks] + properties: + clknblocks: + type: array + items: + type: array + items: + anyOf: + - type: string + format: byte + description: Base64 encoded CLK data + - type: string + - type: integer + + UploadReceipt: + properties: + receipt_token: + type: string + message: + type: string diff --git a/backend/entityservice/api_def/swagger.yaml b/backend/entityservice/api_def/swagger.yaml deleted file mode 100644 index d129009c..00000000 --- a/backend/entityservice/api_def/swagger.yaml +++ /dev/null @@ -1,966 +0,0 @@ -# A Swagger 2.0 (a.k.a. OpenAPI) definition of the Entity Service API. -# -# This is used for generating API documentation and the types used by the -# server. -# -# Some style notes: -# - This file is used by ReDoc, which allows GitHub Flavored Markdown in -# descriptions. -swagger: '2.0' -info: - version: '1.2' - title: Entity Matching API - x-logo: - url: http://clkhash.readthedocs.io/en/latest/_static/logo.svg - contact: - name: 'Confidential Computing, Data61 | CSIRO' - email: support@n1analytics.com - description: >- - Allows two organisations to carry out private record linkage - - without disclosing personally identifiable information. - - In general: - - * All parameters and returned objects are JSON with content-type set to - `"application/json"`. - - * All authentication tokens are 48 character hex strings. - - * The `Project`/`Run` resource identifiers are also 48 char hex strings. - - * HTTP status codes are used to distinguish server outcomes. - - - **Note:** Several important concepts including security considerations are described in - [separate documentation](./concepts.html). - - ### Introduction - - This is a service that computes matches of entity records between two parties. - The personally identifiable information used for linking is first locally transformed - to anonymous linking codes called [Cryptographic Longterm Keys](concepts.html#cryptographic-longterm-key) - (CLKs). Each party then uploads its CLKs to the service for matching. - The service supports three different [types of outputs](concepts.html#result-types) - of matching results, varying in privacy properties. - For the CLKs to be comparable, the parties need to agree on and follow a particular - [linkage schema](concepts.html#schema) and agree on a shared secret before generating - and uploading the CLKs. - - - ### Security - - See the linked [security documentation](./security.html). - - - __Warning__: There are no restrictions on accessing the service, i.e., everyone can call the different endpoints. - However, authorization is required to access sensitive data. The authorization is described [here](security.html#auth). - - - Most endpoints require an authorization header to be sent along with any request. - - - ### Generating CLKs - - See the [clkhash](https://github.com/n1analytics/clkhash) project for details and a command line - tool to locally generate CLKs from entity data to upload. - - - - ## Matching Protocols - - The Entity Service supports three different **result types** with varying privacy properties which define the produced result, - and who may see the which part of the output. - - See the documentation section on [output types](./concepts.html#result-types). - - The options are: - - * `"permutations"` - Creates random permutations and a mask. - * `"similarity_scores"` - Outputs a list of similarity scores of `[indexA, indexB, score]`, where `score` - represents the likelihood that `indexA = indexB`. - * `"groups"` - Outputs a list of groups of records, where each group represents one entity. - - Only `"groups"` supports multi-party linkage. `"permutations"` and `"similarity_scores"` only support linkage - with two parties. - - - -host: es.data61.xyz -schemes: - - "https" -basePath: /api/v1 -produces: - - application/json -consumes: - - application/json -paths: - /status: - get: - operationId: entityservice.views.general.status_get - summary: Service status - tags: - - General - description: | - Suitable for using as load balancer health check. Checks that - redis cache and database are operational. Provides very high level system wide - metrics. - responses: - '200': - description: Service status. - schema: - type: object - properties: - status: - type: string - description: Will be "ok" if everything is working. - project_count: - description: Number of projects created on this server. - type: integer - rate: - type: integer - description: The most recent reported comparison rate of the system (comparisons per second). - examples: - application/json: - - status: "ok" - number_mappings: 3118 - rate: 1793757 - '500': - description: | - System is experiencing difficulties. - E.g. application can't connect to database. - /version: - get: - operationId: entityservice.views.general.version_get - summary: Version - tags: - - General - description: | - Return version numbers of libraries used. - responses: - '200': - description: Versions - schema: - type: object - properties: - anonlink: - type: string - entityservice: - type: string - python: - type: string - examples: - application/json: - {"anonlink": "0.6.3", "entityservice": "v1.7.2", "python": "3.6.4"} - /projects: - get: - operationId: entityservice.views.project.projects_get - summary: List of record linkage projects. - tags: - - Project - description: | - List of all privacy preserving record linkage projects. - responses: - '200': - description: Successful response - schema: - $ref: '#/definitions/ProjectList' - post: - operationId: entityservice.views.project.projects_post - summary: Create and configure a new linkage project - tags: - - Project - description: | - Prepare resources and endpoints to accept CLK uploads from data providers. - Provides the caller with credentials that will be required for any further - interaction with this project. - - ### Schema - - Although these comprise the column names, the raw data will never be - sent to this entity service. - - Each participant will be able to see the schema to verify it is - what they expect. Schema details should have been determined and agreed - on by each party before starting a mapping task. This is documented - in [schema](./concepts.html#schema). - - ### Result Type - - The result type specifies what information is available after the entity - resolving process has completed. All project **runs** will use this result type. - - The parts of the computed linkage results that are accessable by the different tokens depends on the - `result_type`: - - - `"similarity_scores"`, or `"groups"`\ - If the `result_type` is `"similarity_scores"` or `"groups"` then the results can be accessed with the - `result_token``token`, which is provided when initially creating the mapping. - - - `"permutations"`\ - If the `result_type` is `permutations`, then the data providers can access their respective permutation with - their individual `receipt_token`, which they obtain when adding data to the mapping. - The mask can be accessed with the `result_token`. - - Only `"groups"` supports multi-party linkage. If the result type is `"similarity_scores"` or - `"permutations"`, then the number of parties must be 2. - - parameters: - - in: body - name: project - required: true - schema: - $ref: '#/definitions/NewProject' - responses: - '201': - description: New project created - schema: - $ref: '#/definitions/NewProjectResponse' - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '503': - $ref: '#/responses/RateLimited' - - '/projects/{project_id}': - parameters: - - $ref: '#/parameters/token' - - $ref: '#/parameters/project_id' - - get: - operationId: entityservice.views.project.project_get - summary: Description of a particular linkage project - tags: - - Project - description: | - Details provided at project creation. - - responses: - '200': - description: Project description - schema: - $ref: '#/definitions/ProjectDescription' - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - - delete: - operationId: entityservice.views.project.project_delete - summary: Delete linkage project. - tags: - - Project - description: Remove the project and all associated data from the service. - responses: - '204': - description: Successfully deleted - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - - '/projects/{project_id}/clks': - post: - operationId: entityservice.views.project.project_clks_post - summary: Upload encoded PII data to a linkage project. - consumes: - - application/json - - application/octet-stream - tags: - - Project - description: | - Called by each of the data providers with their calculated `CLK` vectors. - The project must have been created, and the caller must have both the - `project_id` and a valid `upload_token` in order to contribute data. - - The data uploaded is one [CLK](./concepts.html#cryptographic-longterm-keys) per entity/row. - - When the last party successfully uploads their data any existing project runs are queued - for execution. - - The uploaded encodings must all have the same length in bytes. If the project's linkage schema - specifes an encoding size it will be checked and enforced before any runs are computed. Note a - minimum and maximum encoding size can be set at the server level at deployment time. - Currently anonlink requires this _encoding size_ to be a multiple of 8. An example value is 128 Bytes. - - Note in the default deployment the maximum request size is set to `~10 GB`, which __should__ - translate to just over 20 million entities. - - ### JSON Upload - - This endpoint can be used with the Content-Type: application/json and uses the `CLKUpload` - structure of a JSON array of base64 encoded strings. - - ### Binary Upload - - An experimental api has been added for uploading CLKs as a binary file. This is to allow for - faster and more efficient data transfer. - - Additional headers are required: - - Content-Type: application/octet-stream - Hash-Count: - Hash-Size: - - The file is a series of CLK data: - - bit_packing_fmt = "!s" - - Where: - - "!" Use network byte order (big-endian). - "s" Store the 128 raw bytes of the bitarray. Default value is "128s". - - https://docs.python.org/3/library/struct.html#format-strings - - parameters: - - $ref: '#/parameters/project_id' - - $ref: '#/parameters/token' - - in: body - name: clks - required: true - schema: - $ref: '#/definitions/CLKUpload' - responses: - '201': - description: Data Uploaded - schema: - $ref: '#/definitions/UploadReceipt' - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - - '/projects/{project_id}/runs': - parameters: - - $ref: '#/parameters/project_id' - - $ref: '#/parameters/token' - get: - operationId: entityservice.views.run.list.get - summary: List runs - tags: - - Run - description: | - List of run summaries. - Requires project level authorization. - - responses: - '200': - description: List of runs - schema: - $ref: '#/definitions/RunList' - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - - post: - operationId: entityservice.views.run.list.post - summary: Create a new run - tags: - - Run - description: | - Queue up a computation to compare entities. - parameters: - - $ref: '#/parameters/project_id' - - $ref: '#/parameters/token' - - in: body - name: run - required: true - schema: - $ref: '#/definitions/NewRun' - responses: - '201': - description: Created new run - schema: - $ref: '#/definitions/RunDescription' - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - - - '/projects/{project_id}/runs/{run_id}': - parameters: - - $ref: '#/parameters/project_id' - - $ref: '#/parameters/run_id' - - $ref: '#/parameters/token' - get: - operationId: entityservice.views.run.description.get - summary: Description of a particular run - tags: - - Run - description: | - This endpoint allows the caller to see what parameters a run was created with. - Also see the ``/status`` endpoint. - responses: - '200': - description: Success - schema: - $ref: '#/definitions/RunDescription' - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - - delete: - operationId: entityservice.views.run.description.delete - summary: Delete a run - tags: - - Run - description: Cancel the run and remove all associated data from the service. - responses: - '204': - description: Successfully deleted - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - - '/projects/{project_id}/runs/{run_id}/status': - get: - operationId: entityservice.views.run.status.get - summary: Run status - tags: - - Run - description: | - A status update on a particular record linkage run. - - `'time_added'`, `'time_started'` and `'time_completed'` are represented in - [ISO 8601 format](https://docs.python.org/3/library/datetime.html#datetime.datetime.isoformat). - - parameters: - - $ref: '#/parameters/project_id' - - $ref: '#/parameters/run_id' - - $ref: '#/parameters/token' - responses: - '200': - description: Successful response - schema: - $ref: '#/definitions/RunStatus' - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - - '/projects/{project_id}/runs/{run_id}/result': - get: - operationId: entityservice.views.run.results.get - summary: Run result - tags: - - Run - description: | - - ## Response - - The response schema depends on the mapping's `result_type`. OpenAPI doesn't - allow different schemas (in v2) so these are documented here: - - Note if the result isn't ready, a `404` will be returned. - - - ### result_type = "similarity_scores" - - The list of the pairwise similarity scores where the score - is greater than the similarity threshold. - Data is returned as `json` object e.g., - - { - "similarity_scores": - [ - [[0, 5], [1, 27], 1.0], - [[1, 10], [0, 14], 1.0] - ] - } - - - The element in the list is a list of three elements of the following format - `[[party_id_0, row_index_0], [party_id_1, row_index_1], score]`, where `[party_id_0, row_index_0]` - refers to the record at the index `row_index_0` from the dataset `party_id_0`, similarly for - `[party_id_1, row_index_1]`, and `score` is the similarity score representing the likelihood - that this pair or records is a match. - - `ds_index_0`, `rec_index_0, `ds_index_1` and `rec_index_1` start from 0, and `party_id_0 != party_id_1` but - are not necessarilly ordered. - - The value of `score` is between 0.0 and 1.0. The higher the score, the higher the similarity between - the compared CLKs. - - ### result_type = "permutations" - - The data providers will receive their respective permutation: - - - { - "permutation": [3,0,4,1,2], - "rows": 5 - } - - - The creator of the mapping gets access to the mask: - - { - "mask": [0,1,0,1,1] - } - - - In this example the first three elements in the original dataset are included, - but have been reordered to the second, fourth and fifth positions. The other elements - have been excluded with the mask. Note the permutation is specific to - the caller. Also any data after row 5 is to be discarded after the reordering has - been applied. - The mask is an array of 0/1 numbers. - - ### result_type = "groups" - - A list of groups of records, where each group represents one entity. A record is a - 2-tuple of the dataset index and record index within that dataset. - - For example: - {"groups": [[[0, 6], [1, 3], [3, 1]], - [[0, 8], [2, 6]], - [[0, 3], [3, 4]]]} - - Here, we have three groups. The first group contains three records and the other two - groups each contain two records. - - In the first group, [0, 6] is the 7th record of the 1st dataset (because we're using) - 0-indexing. A particular record will appear in at most one group. - - In this example, there are many records that are not matched to any other records (for - example [0, 5]). These trivial groups are omitted. - - - parameters: - - $ref: '#/parameters/project_id' - - $ref: '#/parameters/run_id' - - $ref: '#/parameters/token' - responses: - '200': - description: Successful response - '400': - $ref: '#/responses/BadRequest' - '403': - $ref: '#/responses/Unauthorized' - '404': - $ref: '#/responses/NotFound' - '500': - $ref: '#/responses/Error' - '503': - $ref: '#/responses/RateLimited' - -parameters: - token: - required: true - type: string - name: Authorization - in: header - project_id: - in: path - name: "project_id" - description: Resource identifier of the record linkage project - required: true - type: string - run_id: - in: path - name: "run_id" - description: Resource identifier of a particular run in a record linkage project - required: true - type: string - -# Descriptions of common responses -responses: - Error: - description: An unspecified error occured - schema: - $ref: '#/definitions/Problem' - NotFound: - description: The specified resource was not found - schema: - $ref: "#/definitions/Problem" - BadRequest: - description: Client's request could not be processed. - schema: - $ref: "#/definitions/Problem" - Unauthorized: - description: Authentication token could be missing, or invalid. - schema: - $ref: "#/definitions/Problem" - RateLimited: - description: Client is rate limited. Try requesting less frequently. - - -definitions: - - Problem: - type: object - properties: - type: - type: string - format: uri - description: | - An absolute URI that identifies the problem type. When dereferenced, - it SHOULD provide human-readable documentation for the problem type - (e.g., using HTML). - default: 'about:blank' - example: 'http://anonlink.readthedocs.io/en/latest/problems.html#ratelimited' - title: - type: string - description: | - A short, summary of the problem type. Written in english and readable - for engineers; example: Service Unavailable - status: - type: integer - format: int32 - description: | - The HTTP status code generated by the origin server for this occurrence - of the problem. - minimum: 100 - maximum: 600 - exclusiveMaximum: true - example: 503 - detail: - type: string - description: | - A human readable explanation specific to this occurrence of the - problem. - example: Connection to database timed out - instance: - type: string - format: uri - description: | - An absolute URI that identifies the specific occurrence of the problem. - It may or may not yield further information if dereferenced. - - ProjectList: - type: array - items: - type: object - description: Listing of projects. - properties: - project_id: - type: string - time_added: - type: string - description: ISO8601 formated datetime - format: 'date-time' - - Schema: - type: object - description: | - The `schema` is described in the [clkhash](http://clkhash.readthedocs.io/en/latest/schema.html) - documentation. - - A json-schema definition can be found [here](http://clkhash.readthedocs.io/en/latest/_static/schema.json#hashing-schema) - - ResultType: - type: string - description: | - Defines the output type of the mapping. Multi-party linkage requires `"groups"` to be used. - enum: - - groups - - permutations - - similarity_scores - - RunState: - type: string - description: What state is the run in - enum: - - created - - queued - - running - - completed - - error - - RunProgress: - description: Details how far completed the run is. - type: object - properties: - absolute: - description: The progress of this stage in absolute terms - type: number - format: int - minimum: 0 - description: - type: string - description: oportunity to give those numbers some context, what are we counting here? - relative: - type: number - format: double - minimum: 0.0 - maximum: 1.0 - description: | - Decimal between 0.0 and 1.0 indicating the relative progress of this stage. - required: - - absolute - - relative - - NewProject: - description: Configuration for a new linkage project. - type: object - properties: - schema: - $ref: '#/definitions/Schema' - result_type: - $ref: '#/definitions/ResultType' - number_parties: - description: | - How many data providers will participate in this project. Default value is 2. - If there are more than two data providers, then `result_type` must be - `"groups"`. - type: number - minimum: 2 - name: - description: Any free text name for this project. - type: string - notes: - description: Any free text to store with this project. - type: string - uses_blocking: - type: boolean - description: | - Whether the linkage uses blocking. This requires that the CLKs are uploaded with corresponding blocking - information. - required: - - schema - - result_type - - ProjectDescription: - allOf: - - $ref: '#/definitions/NewProject' - - type: object - properties: - project_id: - type: string - parties_contributed: - type: number - error: - type: boolean - - - NewProjectResponse: - properties: - project_id: - type: string - update_tokens: - type: array - description: Array of unique tokens authorizing the upload of CLKs. - items: - type: string - result_token: - description: A token required to access mapping results. - type: string - - NewRun: - description: Configuration for a new run doing the linkage calculation - type: object - properties: - threshold: - type: number - format: double - description: | - The similarity threshold (a number between 0 and 1) above which two entites will be considered a match. - notes: - type: string - description: | - Some optional text that we store along with the run. - name: - type: string - description: | - Some short human readable name that we store along with the run. - - required: - - threshold - - RunDescription: - allOf: - - $ref: '#/definitions/NewRun' - - type: object - properties: - run_id: - type: string - - RunList: - type: array - items: - type: object - description: Listing of runs. - properties: - run_id: - type: string - time_added: - type: string - description: ISO8601 formated datetime - format: 'date-time' - state: - $ref: '#/definitions/RunState' - - RunStatus: - type: object - discriminator: state - properties: - time_added: - type: string - format: 'date-time' - state: - type: string - description: What state is the run in - enum: - - created - - queued - - running - - completed - - error - stages: - type: number - format: int - minimum: 1 - description: total number of stages for this run - current_stage: - $ref: '#/definitions/RunStage' - required: - - time_added - - state - - stages - - current_stage - - completed: - allOf: - - $ref: '#/definitions/RunStatus' - - type: object - properties: - time_started: - type: string - format: 'date-time' - time_completed: - type: string - format: 'date-time' - required: - - time_started - - time_completed - - error: - description: Describes the errored Run. Must contain a 'message', can provide more detail in 'detail'. - allOf: - - $ref: '#/definitions/RunStatus' - - type: object - properties: - message: - type: string - description: short error message - detail: - type: string - description: detailed error message - required: - - message - - - queued: - allOf: - - $ref: '#/definitions/RunStatus' - - type: object - properties: - time_started: - type: string - format: 'date-time' - - - started: - allOf: - - $ref: '#/definitions/queued' - - - running: - allOf: - - $ref: '#/definitions/RunStatus' - - type: object - properties: - time_started: - type: string - format: 'date-time' - required: - - time_started - - - RunStage: - description: describes the current stage of a run - type: object - properties: - number: - type: number - minimum: 1 - description: the number of the current stage, first stage has number 1 - description: - type: string - description: describes the current stage in human readable form - progress: - $ref: '#/definitions/RunProgress' - required: - - number - - CLKUpload: - description: Array of this party's Bloom Filters - type: object - required: [clks] - properties: - clks: - type: array - items: - type: string - format: byte - description: Base64 encoded CLK data - - UploadReceipt: - properties: - receipt_token: - type: string - message: - type: string diff --git a/backend/entityservice/tests/util.py b/backend/entityservice/tests/util.py index e80c634a..afb14724 100644 --- a/backend/entityservice/tests/util.py +++ b/backend/entityservice/tests/util.py @@ -170,7 +170,7 @@ def create_project_upload_data( new_project_data = create_project_no_data( requests, result_type=result_type, number_parties=number_parties) - upload_url = url + f'/projects/{new_project_data["project_id"]}/clks' + upload_url = url + f'/projects/{new_project_data["project_id"]}/{"binary" if binary else ""}clks' json_responses = [] for clks, update_token in zip(data, new_project_data['update_tokens']): if binary: @@ -405,7 +405,7 @@ def is_run_status(status): def upload_binary_data(requests, data, project_id, token, count, size=128, expected_status_code=201): r = requests.post( - url + '/projects/{}/clks'.format(project_id), + url + '/projects/{}/binaryclks'.format(project_id), headers={ 'Authorization': token, 'Content-Type': 'application/octet-stream', diff --git a/backend/entityservice/views/project.py b/backend/entityservice/views/project.py index 41e1b22e..86d6d509 100644 --- a/backend/entityservice/views/project.py +++ b/backend/entityservice/views/project.py @@ -98,6 +98,82 @@ def project_get(project_id): return ProjectDescription().dump(project_object) +def project_binaryclks_post(project_id): + """ + Update a project to provide encoded PII data. + """ + log = logger.bind(pid=project_id) + headers = request.headers + + parent_span = g.flask_tracer.get_span() + + with opentracing.tracer.start_span('check-auth', child_of=parent_span) as span: + abort_if_project_doesnt_exist(project_id) + if headers is None or 'Authorization' not in headers: + safe_fail_request(401, message="Authentication token required") + + token = headers['Authorization'] + + # Check the caller has valid token -> otherwise 403 + abort_if_invalid_dataprovider_token(token) + + with DBConn() as conn: + dp_id = db.get_dataprovider_id(conn, token) + project_encoding_size = db.get_project_schema_encoding_size(conn, project_id) + upload_state_updated = db.is_dataprovider_allowed_to_upload_and_lock(conn, dp_id) + + if not upload_state_updated: + return safe_fail_request(403, "This token has already been used to upload clks.") + + log = log.bind(dp_id=dp_id) + log.info("Receiving CLK data.") + receipt_token = None + + with opentracing.tracer.start_span('upload-clk-data', child_of=parent_span) as span: + span.set_tag("project_id", project_id) + try: + if headers['Content-Type'] == "application/octet-stream": + span.set_tag("content-type", 'binary') + log.info("Handling binary CLK upload") + try: + count, size = check_binary_upload_headers(headers) + log.info(f"Headers tell us to expect {count} encodings of {size} bytes") + span.log_kv({'count': count, 'size': size}) + except Exception: + log.warning("Upload failed due to problem with headers in binary upload") + raise + # Check against project level encoding size (if it has been set) + if project_encoding_size is not None and size != project_encoding_size: + # fail fast - we haven't stored the encoded data yet + return safe_fail_request(400, "Upload 'Hash-Size' doesn't match project settings") + + # TODO actually stream the upload data straight to Minio. Currently we can't because + # connexion has already read the data before our handler is called! + # https://github.com/zalando/connexion/issues/592 + # stream = get_stream() + stream = BytesIO(request.data) + expected_bytes = binary_format(size).size * count + log.debug(f"Stream size is {len(request.data)} B, and we expect {expected_bytes} B") + if len(request.data) != expected_bytes: + safe_fail_request(400, + "Uploaded data did not match the expected size. Check request headers are correct") + try: + receipt_token = upload_clk_data_binary(project_id, dp_id, stream, count, size) + except ValueError: + safe_fail_request(400, + "Uploaded data did not match the expected size. Check request headers are correct.") + else: + safe_fail_request(400, "Content Type not supported") + except Exception: + log.info("The dataprovider was not able to upload her clks," + " re-enable the corresponding upload token to be used.") + with DBConn() as conn: + db.set_dataprovider_upload_state(conn, dp_id, state='error') + raise + with DBConn() as conn: + db.set_dataprovider_upload_state(conn, dp_id, state='done') + return {'message': 'Updated', 'receipt_token': receipt_token}, 201 + def project_clks_post(project_id): """ Update a project to provide encoded PII data. diff --git a/backend/requirements.txt b/backend/requirements.txt index 54a217c1..1b2c9fb2 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -3,7 +3,7 @@ bitmath==1.3.1.2 celery==4.4.0 clkhash==0.15.1 colorama==0.4.1 # required for structlog -connexion[swagger-ui]==2.6 +connexion==2.6.0 Flask-Opentracing==0.2.0 Flask==1.1.1 flower==0.9.2 diff --git a/benchmarking/benchmark.py b/benchmarking/benchmark.py index 4d214658..e0d16533 100644 --- a/benchmarking/benchmark.py +++ b/benchmarking/benchmark.py @@ -134,7 +134,7 @@ def upload_data(participant, auth_token, clk_length): assert len(facs_data) % SIZE_PER_CLK == 0 try: r = requests.post( - server + '/api/v1/projects/{}/clks'.format(credentials['project_id']), + server + '/api/v1/projects/{}/binaryclks'.format(credentials['project_id']), headers={ 'Authorization': auth_token, 'Content-Type': 'application/octet-stream', diff --git a/docs/_static/openapi.yaml b/docs/_static/openapi.yaml new file mode 120000 index 00000000..48ac7418 --- /dev/null +++ b/docs/_static/openapi.yaml @@ -0,0 +1 @@ +../../backend/entityservice/api_def/openapi.yaml \ No newline at end of file diff --git a/docs/_static/swagger.yaml b/docs/_static/swagger.yaml deleted file mode 120000 index 7287381c..00000000 --- a/docs/_static/swagger.yaml +++ /dev/null @@ -1 +0,0 @@ -../../backend/entityservice/api_def/swagger.yaml \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index f7372845..d718d1bd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -184,7 +184,7 @@ { 'name': 'Entity Service API', 'page': 'api', - 'spec': '_static/swagger.yaml', + 'spec': '_static/openapi.yaml', 'embed': True } ] diff --git a/docs/production-deployment.rst b/docs/production-deployment.rst index 26cd48b0..b13a4c81 100644 --- a/docs/production-deployment.rst +++ b/docs/production-deployment.rst @@ -133,8 +133,8 @@ It should return some information if some values are missing, e.g.:: Notes: - the `lint` command does not exit with a non 0 exit code, and our templates are currently failing if linting with the option `--strict`. - if the folder `Charts` is not deleted, the linting may throw some errors from the dependent charts if a - value is missing without clear description, e.g. if the redis password is missing, the following error is returned from the `redis-ha` template - because the method `b64enc` requires a non empty string, but the template does not check first if the value is empty:: + value is missing without clear description, e.g. if the redis password is missing, the following error is returned from the `redis-ha` template + because the method `b64enc` requires a non empty string, but the template does not check first if the value is empty:: ==> Linting . [ERROR] templates/: render error in "entity-service/charts/redis-ha/templates/redis-auth-secret.yaml": template: entity-service/charts/redis-ha/templates/redis-auth-secret.yaml:10:35: executing "entity-service/charts/redis-ha/templates/redis-auth-secret.yaml" at : invalid value; expected string diff --git a/frontend/Dockerfile b/frontend/Dockerfile index eb888acf..7622c721 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -3,7 +3,7 @@ FROM data61/anonlink-docs-builder:20190625 AS docsbuilder COPY ./docs /src/docs -COPY ./backend/entityservice/api_def/swagger.yaml /src/docs/_static/swagger.yaml +COPY ./backend/entityservice/api_def/openapi.yaml /src/docs/_static/openapi.yaml WORKDIR /src RUN python -m sphinx /src/docs /build