Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

A service for performing privacy preserving record linkage. Allows organizations to carry out record linkage without disclosing personally identifiable information.

Clients should use [clkhash](https://github.com/data61/clkhash/) or the [encoding-service](https://github.com/data61/anonlink-encoding-service/).
Clients should use [anonlink-client](https://github.com/data61/anonlink-client/) or the [encoding-service](https://github.com/data61/anonlink-encoding-service/).

## Documentation

Expand Down
3 changes: 0 additions & 3 deletions backend/entityservice/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ class Config(object):
"""

# If adding or deleting any, please ensure that the changelog will mention them.

DEBUG = os.getenv("DEBUG", "false") == "true"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this not needed anymore?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We removed the debug flag a long time ago. Not sure why this hadn't been cleaned up


CONNEXION_STRICT_VALIDATION = os.getenv("CONNEXION_STRICT_VALIDATION", "true").lower() == "true"
CONNEXION_RESPONSE_VALIDATION = os.getenv("CONNEXION_RESPONSE_VALIDATION", "true").lower() == "true"

Expand Down
3 changes: 2 additions & 1 deletion base/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
anonlink==0.12.5
anonlink-client==0.1.2
bitmath==1.3.1.2
celery==4.4.2
clkhash==0.15.1
clkhash==0.16.0a1
colorama==0.4.3 # required for structlog
connexion==2.6.0
Flask-Opentracing==1.1.0
Expand Down
1 change: 1 addition & 0 deletions docs/tutorial/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ ADD tutorial-requirements.txt /src/requirements.txt
WORKDIR /src
RUN pip install -U -r requirements.txt
ADD . /src
ENTRYPOINT /bin/sh
145 changes: 75 additions & 70 deletions docs/tutorial/Permutations.ipynb

Large diffs are not rendered by default.

81 changes: 45 additions & 36 deletions docs/tutorial/Record Linkage API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
"source": [
"# Anonlink Entity Service API\n",
"\n",
"This tutorial demonstrates interacting with the entity service via the REST API. The primary alternative is to use\n",
"a library or command line tool such as [`clkhash`](http://clkhash.readthedocs.io/) which can handle the communication with the anonlink entity service.\n",
"This tutorial demonstrates directly interacting with the entity service via the REST API. The primary alternative is to use\n",
"a library or command line tool such as [`anonlink-client`](https://anonlink-client.readthedocs.io/) which can handle the communication with the anonlink entity service.\n",
"\n",
"### Dependencies\n",
"\n",
"In this tutorial we interact with the REST API using the `requests` Python library. Additionally we use the `clkhash` Python library in this tutorial to define the linkage schema and to encode the PII. The synthetic dataset comes from the `recordlinkage` package. All the dependencies can be installed with pip:\n",
"In this tutorial we interact with the REST API using the `requests` Python library. Additionally we use the `clkhash` Python library to define the linkage schema and to encode the PII. The synthetic dataset comes from the `recordlinkage` package. All the dependencies can be installed with pip:\n",
"\n",
"```\n",
"pip install requests clkhash recordlinkage\n",
Expand Down Expand Up @@ -94,7 +94,7 @@
{
"data": {
"text/plain": [
"{'project_count': 6535, 'rate': 2504556, 'status': 'ok'}"
"{'project_count': 834, 'rate': 504848, 'status': 'ok'}"
]
},
"execution_count": 3,
Expand Down Expand Up @@ -261,8 +261,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"generating CLKs: 100%|██████████| 5.00k/5.00k [00:00<00:00, 10.7kclk/s, mean=643, std=45.7]\n",
"generating CLKs: 100%|██████████| 5.00k/5.00k [00:00<00:00, 13.5kclk/s, mean=631, std=52.9]\n"
"generating CLKs: 100%|██████████| 5.00k/5.00k [00:00<00:00, 9.82kclk/s, mean=643, std=45.7]\n",
"generating CLKs: 100%|██████████| 5.00k/5.00k [00:00<00:00, 11.3kclk/s, mean=631, std=52.9]\n"
]
}
],
Expand Down Expand Up @@ -298,10 +298,10 @@
{
"data": {
"text/plain": [
"{'project_id': '4dc4e94983cb003e6eaf1dd9e09ece6c4f6b142076bf2ca3',\n",
" 'result_token': '0924d34d7fa1a0dc9b3d04b247698d8c070d0896cdb1c7d8',\n",
" 'update_tokens': ['317b702338d3c9cd8f1d4cdf92bbc67e3a0f9ff019886be5',\n",
" 'df27bf6dc03b2d166cd33f4ac806fef096c6d4eb5103c105']}"
"{'project_id': '8f6bbd8be51b5fc2de9e0094a6f93042ba210fcbc2204c94',\n",
" 'result_token': 'fd60ffe37abef769f7827cfe05dc47ec21d22c39568cc2fc',\n",
" 'update_tokens': ['6ff34bc02872bf0e109906f50316cb02ed77ca2995a56d10',\n",
" '3ee4061d172a102091c0d377159fd5d706fd5ee6412a798f']}"
]
},
"execution_count": 9,
Expand Down Expand Up @@ -351,9 +351,10 @@
" 'notes': '',\n",
" 'number_parties': 2,\n",
" 'parties_contributed': 0,\n",
" 'project_id': '4dc4e94983cb003e6eaf1dd9e09ece6c4f6b142076bf2ca3',\n",
" 'project_id': '8f6bbd8be51b5fc2de9e0094a6f93042ba210fcbc2204c94',\n",
" 'result_type': 'groups',\n",
" 'schema': {}}"
" 'schema': {},\n",
" 'uses_blocking': False}"
]
},
"execution_count": 10,
Expand Down Expand Up @@ -435,7 +436,7 @@
"source": [
"## Create a run\n",
"\n",
"Now the project has been created and the CLK data has been uploaded we can carry out some privacy preserving record linkage. Try with a few different threshold values:"
"Now the project has been created and the CLK encodings have been uploaded we can carry out some privacy preserving record linkage. The same encoding data can be linked using different threshold values by creating **runs**."
]
},
{
Expand Down Expand Up @@ -490,12 +491,15 @@
{
"data": {
"text/plain": [
"{'current_stage': {'description': 'compute output', 'number': 3},\n",
"{'current_stage': {'description': 'waiting for CLKs',\n",
" 'number': 1,\n",
" 'progress': {'absolute': 2,\n",
" 'description': 'number of parties already contributed',\n",
" 'relative': 1.0}},\n",
" 'stages': 3,\n",
" 'state': 'completed',\n",
" 'time_added': '2019-11-18T22:23:20.085746+00:00',\n",
" 'time_completed': '2019-11-18T22:23:20.995602+00:00',\n",
" 'time_started': '2019-11-18T22:23:20.226739+00:00'}"
" 'state': 'created',\n",
" 'time_added': '2020-04-02T23:08:03.984457+00:00',\n",
" 'time_started': None}"
]
},
"execution_count": 15,
Expand All @@ -518,7 +522,7 @@
"source": [
"## Results\n",
"\n",
"Now after some delay (depending on the size) we can fetch the results. This can of course be done by directly polling the REST API using `requests`, however for simplicity we will just use the watch_run_status function provided in `clkhash.rest_client`.\n",
"Now after some delay (depending on the size) we can fetch the results. This can of course be done by directly polling the REST API using `requests`, however for simplicity we will just use the watch_run_status function provided in `anonlinkclient.rest_client`.\n",
"\n",
"> Note the `server` is provided rather than `url`."
]
Expand All @@ -542,9 +546,9 @@
}
],
"source": [
"from clkhash.rest_client import RestClient\n",
"from clkhash.rest_client import format_run_status\n",
"from anonlinkclient.rest_client import RestClient, format_run_status\n",
"rest_client = RestClient(server)\n",
"\n",
"for update in rest_client.watch_run_status(project_id, run_id, credentials['result_token'], timeout=300):\n",
" clear_output(wait=True)\n",
" print(format_run_status(update))"
Expand Down Expand Up @@ -588,16 +592,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"a[843] maps to b[4886]\n",
"a[1609] maps to b[3419]\n",
"a[35] maps to b[3965]\n",
"a[524] maps to b[4735]\n",
"a[1242] maps to b[1106]\n",
"a[2398] maps to b[4832]\n",
"a[1125] maps to b[1371]\n",
"a[1218] maps to b[725]\n",
"a[1611] maps to b[1985]\n",
"a[1467] maps to b[4683]\n",
"a[870] maps to b[1723]\n",
"a[174] maps to b[4485]\n",
"a[420] maps to b[4323]\n",
"a[136] maps to b[1416]\n",
"a[3090] maps to b[4797]\n",
"a[2940] maps to b[3663]\n",
"a[2228] maps to b[1095]\n",
"a[292] maps to b[4409]\n",
"a[1340] maps to b[3978]\n",
"a[218] maps to b[517]\n",
"...\n"
]
}
Expand Down Expand Up @@ -672,10 +676,15 @@
}
],
"source": [
"requests.delete(\n",
" \"{}/projects/{}\".format(url, project_id), \n",
" headers={\"Authorization\": credentials['result_token']})"
"requests.delete(\"{}/projects/{}\".format(url, project_id), headers={\"Authorization\": credentials['result_token']})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -694,7 +703,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
"version": "3.8.0"
},
"pycharm": {
"stem_cell": {
Expand All @@ -707,5 +716,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
Loading