From 22245bf20781c2c37a1f5cc104f34eaf96dc8a73 Mon Sep 17 00:00:00 2001 From: Guillaume Smith Date: Mon, 18 Nov 2019 11:24:10 +1100 Subject: [PATCH 1/5] Update clkhash dependency to 0.15.0 --- backend/requirements.txt | 2 +- docs/tutorial/tutorial-requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index d0d51014..42e464eb 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,7 +1,7 @@ anonlink==0.12.5 bitmath==1.3.1.2 celery==4.3.0 -clkhash==0.14.0 +clkhash==0.15.0 colorama==0.4.1 # required for structlog connexion==1.4 Flask-Opentracing==0.2.0 diff --git a/docs/tutorial/tutorial-requirements.txt b/docs/tutorial/tutorial-requirements.txt index cb0725bb..dfcba3ac 100644 --- a/docs/tutorial/tutorial-requirements.txt +++ b/docs/tutorial/tutorial-requirements.txt @@ -1,4 +1,4 @@ -clkhash>=0.13.0 +clkhash>=0.15.0 ipython matplotlib recordlinkage From a6cc34488c0649e7117357b2ea3ec93d1aa88786 Mon Sep 17 00:00:00 2001 From: Guillaume Smith Date: Mon, 18 Nov 2019 11:24:46 +1100 Subject: [PATCH 2/5] Update tutorials to work with clkhash 0.15.0 clkhash 0.15.0 uses only one secret to hash. --- docs/tutorial/Permutations.ipynb | 147 +++---- docs/tutorial/Record Linkage API.ipynb | 197 ++++++---- docs/tutorial/Similarity Scores.ipynb | 126 +++--- .../multiparty-linkage-with-clkhash.ipynb | 362 +++++++++--------- 4 files changed, 421 insertions(+), 411 deletions(-) diff --git a/docs/tutorial/Permutations.ipynb b/docs/tutorial/Permutations.ipynb index 7e3690d9..de1ff6bb 100644 --- a/docs/tutorial/Permutations.ipynb +++ b/docs/tutorial/Permutations.ipynb @@ -82,7 +82,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{\"project_count\": 1021, \"rate\": 2453247, \"status\": \"ok\"}\n" + "{\"project_count\": 5938, \"rate\": 2118828, \"status\": \"ok\"}\r\n" ] } ], @@ -294,7 +294,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Overwriting /tmp/tmptfalxkiq\n" + "Overwriting /tmp/tmp2d7l4ief\n" ] } ], @@ -399,17 +399,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Credentials will be saved in /tmp/tmpyr8dc2pf\n", + "Credentials will be saved in /tmp/tmp5kxg8nky\n", "\u001b[31mProject created\u001b[0m\n" ] }, { "data": { "text/plain": [ - "{'project_id': 'b8211d1450c8d0d631dbdc1fb482af106b8cbdebed5b7fd3',\n", - " 'result_token': '8fe1fc01f7ac3a3406d1e031b7d120800aa6460d0da62abb',\n", - " 'update_tokens': ['1c39c6972626bd34729812f0b9cf6e467461824dbbd0682c',\n", - " '901c12061cf621b67df5b9de2719b8806636364d3fdc1765']}" + "{'project_id': '61ad07b11de00b335e4efdb440d0da061b9e89f0b7d25006',\n", + " 'result_token': 'f75ec8d9142b2ac43c1f43899b663113a260e97a3fc23bca',\n", + " 'update_tokens': ['25df4927eccfb37d11b1ab72dad0abc9b5d9abae9e73f0fc',\n", + " '3aafac5ff5c5024a9f770fb2dc8d87a8ab73a504c0a2a03c']}" ] }, "execution_count": 7, @@ -446,7 +446,7 @@ "At the moment both data providers have *raw* personally identiy information. We first have to generate CLKs from the raw entity information. We need:\n", "- the *clkhash* library\n", "- the linkage schema from above\n", - "- and two secret passwords which are only known to Alice and Bob. (here: `horse` and `staple`)\n", + "- and a secret which is only known to Alice and Bob. (here: `my_secret`)\n", "\n", "Please see [clkhash](https://clkhash.readthedocs.io/) documentation for further details on this." ] @@ -464,16 +464,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "generating CLKs: 100%|█| 5.00k/5.00k [00:01<00:00, 1.32kclk/s, mean=765, std=37.1]\n", - "\u001b[31mCLK data written to /tmp/tmpc_4k553j.json\u001b[0m\n", - "generating CLKs: 100%|█| 5.00k/5.00k [00:01<00:00, 4.28kclk/s, mean=756, std=43.3]\n", - "\u001b[31mCLK data written to /tmp/tmpv7eo2tfp.json\u001b[0m\n" + "\u001b[31mCLK data written to /tmp/tmpdumz7c8u.json\u001b[0m\n", + "\u001b[31mCLK data written to /tmp/tmpq70k1o_9.json\u001b[0m\n" ] } ], "source": [ - "!clkutil hash \"{a_csv.name}\" horse staple \"{schema.name}\" \"{a_clks.name}\"\n", - "!clkutil hash \"{b_csv.name}\" horse staple \"{schema.name}\" \"{b_clks.name}\"" + "!clkutil hash \"{a_csv.name}\" my_secret \"{schema.name}\" \"{a_clks.name}\"\n", + "!clkutil hash \"{b_csv.name}\" my_secret \"{schema.name}\" \"{b_clks.name}\"" ] }, { @@ -498,22 +496,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "Usage: clkutil upload [OPTIONS] CLK_JSON\n", - "\n", - " Upload CLK data to entity matching server.\n", - "\n", - " Given a json file containing hashed clk data as CLK_JSON, upload to the\n", - " entity resolution service.\n", - "\n", - " Use \"-\" to read from stdin.\n", - "\n", - "Options:\n", - " --project TEXT Project identifier\n", - " --apikey TEXT Authentication API key for the server.\n", - " --server TEXT Server address including protocol\n", - " -o, --output FILENAME\n", - " -v, --verbose Script is more talkative\n", - " --help Show this message and exit.\n" + "Usage: clkutil upload [OPTIONS] CLK_JSON\r\n", + "\r\n", + " Upload CLK data to entity matching server.\r\n", + "\r\n", + " Given a json file containing hashed clk data as CLK_JSON, upload to the\r\n", + " entity resolution service.\r\n", + "\r\n", + " Use \"-\" to read from stdin.\r\n", + "\r\n", + "Options:\r\n", + " --project TEXT Project identifier\r\n", + " --apikey TEXT Authentication API key for the server.\r\n", + " -o, --output FILENAME\r\n", + " --server TEXT Server address including protocol. Default\r\n", + " https://testing.es.data61.xyz.\r\n", + " --retry-multiplier INTEGER If receives a 503 from\r\n", + " server, minimum waiting time before\r\n", + " retrying. Default 100.\r\n", + " --retry-exponential-max INTEGER\r\n", + " If receives a 503 from\r\n", + " server, maximum time interval between\r\n", + " retries. Default 10000.\r\n", + " --retry-max-time INTEGER If receives a 503 from\r\n", + " server, retry only within this period.\r\n", + " Default 20000.\r\n", + " -v, --verbose Script is more talkative\r\n", + " --help Show this message and exit.\r\n" ] } ], @@ -653,7 +662,8 @@ "outputs": [], "source": [ "import requests\n", - "import clkhash.rest_client\n", + "from clkhash.rest_client import RestClient\n", + "from clkhash.rest_client import format_run_status\n", "\n", "from IPython.display import clear_output" ] @@ -677,9 +687,10 @@ } ], "source": [ - "for update in clkhash.rest_client.watch_run_status(url, project_id, run_id, credentials['result_token'], timeout=300):\n", + "rest_client = RestClient(url)\n", + "for update in rest_client.watch_run_status(project_id, run_id, credentials['result_token'], timeout=300):\n", " clear_output(wait=True)\n", - " print(clkhash.rest_client.format_run_status(update))" + " print(format_run_status(update))" ] }, { @@ -759,7 +770,7 @@ { "data": { "text/plain": [ - "4858" + "4851" ] }, "execution_count": 18, @@ -815,7 +826,7 @@ { "data": { "text/plain": [ - "[2333, 1468, 559, 274, 653, 3385, 278, 3568, 3617, 4356]" + "[1704, 3246, 227, 2913, 1848, 2942, 1358, 3469, 2025, 1349]" ] }, "execution_count": 20, @@ -849,7 +860,7 @@ { "data": { "text/plain": [ - "[2083, 1106, 3154, 1180, 2582, 375, 3533, 1046, 316, 2427]" + "[3095, 4173, 2439, 29, 3016, 493, 764, 746, 1981, 2840]" ] }, "execution_count": 21, @@ -923,16 +934,16 @@ { "data": { "text/plain": [ - "['rec-2689-org,ainsley,robison,23,atherton street,villa 1/4,deer park,3418,nsw,19310531,4102867\\n',\n", - " 'rec-1056-org,chloe,imgraben,47,curlewis crescent,dragon rising,burleigh waters,2680,qld,19520516,6111417\\n',\n", - " 'rec-1820-org,liam,cullens,121,chandler street,the burrows,safety bay,3073,qld,19910811,7828812\\n',\n", - " 'rec-2192-org,ellie,fearnall,31,fishburn street,colbara,cherrybrook,5171,wa,,7745948\\n',\n", - " 'rec-2696-org,campbell,nguyen,6,diselma place,villa 2,collinswood,4343,nsw,19630325,2861961\\n',\n", - " 'rec-968-org,aidan,blake,15,namatjira drive,cooramin,dromana,4074,vic,19270928,4317464\\n',\n", - " 'rec-3833-org,nicholas,clarke,13,gaylard place,tryphinia view,wetherill park,2810,nsw,19041223,3927795\\n',\n", - " 'rec-4635-org,isabella,white,8,cooling place,,rosebud,6151,sa,19990911,2206317\\n',\n", - " 'rec-3549-org,harry,thorpe,11,kambalda crescent,louisa tor 4,angaston,2777,qld,19421128,2701790\\n',\n", - " 'rec-1220-org,lauren,weltman,6,tewksbury circuit,heritage estate,evans head,6330,nsw,19840930,9462453\\n']" + "['rec-126-org,nikki,jaric,13,renmark street,parish lorne,chester hill,2106,nsw,19520401,9272010\\n',\n", + " 'rec-3413-org,amber,durnin,41,hindmarsh drive,walmount,knoxfield,3634,vic,19251010,8760281\\n',\n", + " 'rec-2774-org,dylan,herbert,150,andrews street,brentwood vlge,sheidow park,5067,vic,19430105,4521571\\n',\n", + " 'rec-249-org,lachlan,shepherd,1,grainger circuit,willandra,adaminaby,7250,nsw,19080122,3139543\\n',\n", + " 'rec-4870-org,carlin,garcia,46,von guerard crescent,,clarendon,2140,nsw,19241024,1512461\\n',\n", + " 'rec-1327-org,emma,copperstone,5,decima circuit,crower,avoca north,2166,nsw,19451208,1538637\\n',\n", + " 'rec-959-org,zac,hand,18,kambalda crescent,mckinnon glen,east maitland,4500,sa,19970725,2709860\\n',\n", + " 'rec-2971-org,jack,highet,26,mathieson crescent,,whittington,3064,vic,19650507,6260000\\n',\n", + " \"rec-4637-org,kydan,gigney,64,o'rourke street,timdoolin,shellharbour,4059,nsw,19760409,3279351\\n\",\n", + " 'rec-2003-org,mitchell,webb,15,denovan circuit,villa 3,connewarre,5091,nsw,19801006,8786441\\n']" ] }, "execution_count": 24, @@ -956,16 +967,16 @@ { "data": { "text/plain": [ - "['rec-2689-dup-0,ainsley,labalck,23,atherto n street,villa 1/4,deer park,3418,nsw,19310531,4102867\\n',\n", - " 'rec-1056-dup-0,james,imgrapen,47,curlewiscrescent,dragon rising,burleigh waters,2680,qld,19520516,6111417\\n',\n", - " 'rec-1820-dup-0,liam,cullens,121,chandlerw street,the burrows,safety bay,3073,qld,19910811,7828812\\n',\n", - " 'rec-2192-dup-0,elpie,fearnull,31,fishbunestreet,,cherrybrook,5171,wa,,7745948\\n',\n", - " 'rec-2696-dup-0,jenna,nguyen,85,diselmaplace,villz2,collinswood,4343,nsw,19630325,2861961\\n',\n", - " 'rec-968-dup-0,aidan,blake,15,namatjifra drive,cooramin,dromana,4074,vic,19270928,4317464\\n',\n", - " 'rec-3833-dup-0,nicholas,clarke,,gaylard place,tryphinia view,wetherill park,2810,nsw,19041223,3972795\\n',\n", - " 'rec-4635-dup-0,isaeblla,white,8,cooling place,massey green,rosebud,6151,sa,19990911,2206317\\n',\n", - " 'rec-3549-dup-0,taylor,thorpe,11,kambalda c rescent,louisa tor 4,angasgon,2777,qld,19421128,2701790\\n',\n", - " 'rec-1220-dup-0,lauren,welman,6,tewksburl circuit,heritage estate,evans head,6330,nsw,19840930,9462453\\n']" + "['rec-126-dup-0,nikki,jaruic,13,renmark street,parishlorne,chester hill,2106,nsw,19520401,9272910\\n',\n", + " 'rec-3413-dup-0,amber,durnin,41,hindmarsh drive,walmoutn,knoxfield,3643,vic,19521010,8760281\\n',\n", + " 'rec-2774-dup-0,dylan,herbert,150,andrews street,brentwoo dvlge,sheido wpark,5067,vic,19430105,4521571\\n',\n", + " 'rec-249-dup-0,lachlan,shephaerd,1,grainger circuit,willandra,adaminaby,7250,nsw,19080122,3139543\\n',\n", + " 'rec-4870-dup-0,carlin,munforti,46,von guerard crescent,,clarendon,2140,nsw,19241024,1512461\\n',\n", + " 'rec-1327-dup-0,emma,copperstone,5,decima circuit,crwer,avoca north,2166,nsw,19451208,1536837\\n',\n", + " 'rec-959-dup-0,zac,,18,kambalda crescent,mckinnon glen,east mailand,4500,sa,19970725,2709860\\n',\n", + " 'rec-2971-dup-0,jack,highet,26,mathieson crescent,,kaleen,3064,vic,19650507,6260000\\n',\n", + " \"rec-4637-dup-0,aurora,gigney,64,o'rourke street,timdoolin,shellharbour,4059,nsw,19760409,7169291\\n\",\n", + " 'rec-2003-dup-0,mitchell,,15,denovan circuit,villa 3,,5091,nsw,19801006,8786441\\n']" ] }, "execution_count": 25, @@ -1003,16 +1014,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "Ainsley Robison (rec-2689-org) =? Ainsley Labalck (rec-2689-dup-0)\n", - "Chloe Imgraben (rec-1056-org) =? James Imgrapen (rec-1056-dup-0)\n", - "Liam Cullens (rec-1820-org) =? Liam Cullens (rec-1820-dup-0)\n", - "Ellie Fearnall (rec-2192-org) =? Elpie Fearnull (rec-2192-dup-0)\n", - "Campbell Nguyen (rec-2696-org) =? Jenna Nguyen (rec-2696-dup-0)\n", - "Aidan Blake (rec-968-org) =? Aidan Blake (rec-968-dup-0)\n", - "Nicholas Clarke (rec-3833-org) =? Nicholas Clarke (rec-3833-dup-0)\n", - "Isabella White (rec-4635-org) =? Isaeblla White (rec-4635-dup-0)\n", - "Harry Thorpe (rec-3549-org) =? Taylor Thorpe (rec-3549-dup-0)\n", - "Lauren Weltman (rec-1220-org) =? Lauren Welman (rec-1220-dup-0)\n" + "Nikki Jaric (rec-126-org) =? Nikki Jaruic (rec-126-dup-0)\n", + "Amber Durnin (rec-3413-org) =? Amber Durnin (rec-3413-dup-0)\n", + "Dylan Herbert (rec-2774-org) =? Dylan Herbert (rec-2774-dup-0)\n", + "Lachlan Shepherd (rec-249-org) =? Lachlan Shephaerd (rec-249-dup-0)\n", + "Carlin Garcia (rec-4870-org) =? Carlin Munforti (rec-4870-dup-0)\n", + "Emma Copperstone (rec-1327-org) =? Emma Copperstone (rec-1327-dup-0)\n", + "Zac Hand (rec-959-org) =? Zac (rec-959-dup-0)\n", + "Jack Highet (rec-2971-org) =? Jack Highet (rec-2971-dup-0)\n", + "Kydan Gigney (rec-4637-org) =? Aurora Gigney (rec-4637-dup-0)\n", + "Mitchell Webb (rec-2003-org) =? Mitchell (rec-2003-dup-0)\n" ] } ], @@ -1054,9 +1065,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Found 4858 correct matches out of 5000. Incorrectly linked 0 matches.\n", + "Found 4851 correct matches out of 5000. Incorrectly linked 0 matches.\n", "Precision: 100.0%\n", - "Recall: 97.2%\n" + "Recall: 97.0%\n" ] } ], diff --git a/docs/tutorial/Record Linkage API.ipynb b/docs/tutorial/Record Linkage API.ipynb index c957c7b1..d3a7a143 100644 --- a/docs/tutorial/Record Linkage API.ipynb +++ b/docs/tutorial/Record Linkage API.ipynb @@ -70,10 +70,10 @@ "outputs": [ { "name": "stdout", + "output_type": "stream", "text": [ "Testing anonlink-entity-service hosted at https://testing.es.data61.xyz/api/v1/\n" - ], - "output_type": "stream" + ] } ], "source": [ @@ -93,11 +93,13 @@ "outputs": [ { "data": { - "text/plain": "{'project_count': 7871, 'rate': 301990, 'status': 'ok'}" + "text/plain": [ + "{'project_count': 5940, 'rate': 2162326, 'status': 'ok'}" + ] }, + "execution_count": 3, "metadata": {}, - "output_type": "execute_result", - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ @@ -187,32 +189,50 @@ "outputs": [], "source": [ "import clkhash\n", + "from clkhash.comparators import *\n", "from clkhash.field_formats import *\n", "schema = clkhash.randomnames.NameList.SCHEMA\n", "_missing = MissingValueSpec(sentinel='')\n", "schema.fields = [\n", " Ignore('rec_id'),\n", - " StringSpec('given_name', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " StringSpec('surname', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " IntegerSpec('street_number', \n", - " FieldHashingProperties(ngram=1, \n", - " positional=True, \n", - " k=15, \n", - " missing_value=_missing)),\n", - " StringSpec('address_1', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " StringSpec('address_2', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " StringSpec('suburb', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " IntegerSpec('postcode', \n", - " FieldHashingProperties(ngram=1, positional=True, k=15)),\n", - " StringSpec('state', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " IntegerSpec('date_of_birth', \n", - " FieldHashingProperties(ngram=1, positional=True, k=15, missing_value=_missing)),\n", + " StringSpec('given_name',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " StringSpec('surname',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " IntegerSpec('street_number',\n", + " FieldHashingProperties(\n", + " NgramComparison(1, positional=True),\n", + " BitsPerTokenStrategy(15),\n", + " missing_value=_missing)),\n", + " StringSpec('address_1',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " StringSpec('address_2',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " StringSpec('suburb',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " IntegerSpec('postcode',\n", + " FieldHashingProperties(\n", + " NgramComparison(1, positional=True),\n", + " BitsPerTokenStrategy(15))),\n", + " StringSpec('state',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " IntegerSpec('date_of_birth',\n", + " FieldHashingProperties(\n", + " NgramComparison(1, positional=True),\n", + " BitsPerTokenStrategy(15),\n", + " missing_value=_missing)),\n", " Ignore('soc_sec_id')\n", "]" ] @@ -239,27 +259,20 @@ "outputs": [ { "name": "stderr", + "output_type": "stream", "text": [ - "\rgenerating CLKs: 0%| | 0.00/5.00k [00:00" + "text/plain": [ + "" + ] }, + "execution_count": 20, "metadata": {}, - "output_type": "execute_result", - "execution_count": 20 + "output_type": "execute_result" } ], "source": [ @@ -626,13 +676,6 @@ " \"{}/projects/{}\".format(url, project_id), \n", " headers={\"Authorization\": credentials['result_token']})" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -651,18 +694,18 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.3" }, "pycharm": { "stem_cell": { "cell_type": "raw", - "source": [], "metadata": { "collapsed": false - } + }, + "source": [] } } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/tutorial/Similarity Scores.ipynb b/docs/tutorial/Similarity Scores.ipynb index 63174a6d..6a2eec06 100644 --- a/docs/tutorial/Similarity Scores.ipynb +++ b/docs/tutorial/Similarity Scores.ipynb @@ -100,7 +100,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{\"project_count\": 2115, \"rate\": 7737583, \"status\": \"ok\"}\r\n" + "{\"project_count\": 5942, \"rate\": 2177090, \"status\": \"ok\"}\r\n" ] } ], @@ -312,7 +312,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Overwriting /tmp/tmpvlivqdcf\n" + "Overwriting /tmp/tmp4gg3l5ai\n" ] } ], @@ -416,17 +416,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Credentials will be saved in /tmp/tmpcwpvq6kj\n", + "Credentials will be saved in /tmp/tmpricgd0mg\n", "\u001b[31mProject created\u001b[0m\n" ] }, { "data": { "text/plain": [ - "{'project_id': '1eb3da44f73440c496ab42217381181de55e9dcd6743580c',\n", - " 'result_token': '846c6c25097c7794131de0d3e2c39c04b7de9688acedc383',\n", - " 'update_tokens': ['52aae3f1dfa8a4ec1486d8f7d63a8fe708876b39a8ec585b',\n", - " '92e2c9c1ce52a2c2493b5e22953600735a07553f7d00a704']}" + "{'project_id': 'edc59eacb29f8321b4fc70b43284110670cfd798da9f0835',\n", + " 'result_token': '99c57eaf488e56dd8a172dbccba891c850d32a9993caf14a',\n", + " 'update_tokens': ['3439a921f03e2d5399040f01ff17394c03a0b55efda350e2',\n", + " '50d034571d13b5de9bdccab09d88bd68c65a6b7ab036b71b']}" ] }, "execution_count": 8, @@ -474,16 +474,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "generating CLKs: 100%|█| 5.00k/5.00k [00:01<00:00, 1.06kclk/s, mean=883, std=33.6]\n", - "\u001b[31mCLK data written to /tmp/tmpj8m1dvxj.json\u001b[0m\n", - "generating CLKs: 100%|█| 5.00k/5.00k [00:01<00:00, 1.30kclk/s, mean=875, std=39.7]\n", - "\u001b[31mCLK data written to /tmp/tmpi2y_ogl9.json\u001b[0m\n" + "\u001b[31mCLK data written to /tmp/tmpovptfm5h.json\u001b[0m\n", + "\u001b[31mCLK data written to /tmp/tmprqlatxwl.json\u001b[0m\n" ] } ], "source": [ - "!clkutil hash \"{a_csv.name}\" horse staple \"{schema.name}\" \"{a_clks.name}\"\n", - "!clkutil hash \"{b_csv.name}\" horse staple \"{schema.name}\" \"{b_clks.name}\"" + "!clkutil hash \"{a_csv.name}\" secret \"{schema.name}\" \"{a_clks.name}\"\n", + "!clkutil hash \"{b_csv.name}\" secret \"{schema.name}\" \"{b_clks.name}\"" ] }, { @@ -629,25 +627,26 @@ "text": [ "State: completed\n", "Stage (2/2): compute similarity scores\n", - "Progress: 1.000%\n" + "Progress: 100.00%\n" ] } ], "source": [ - "for update in clkhash.rest_client.watch_run_status(url, project_id, run_id, credentials['result_token'], timeout=300):\n", + "from clkhash.rest_client import RestClient\n", + "from clkhash.rest_client import format_run_status\n", + "rest_client = RestClient(url)\n", + "for update in rest_client.watch_run_status(project_id, run_id, credentials['result_token'], timeout=300):\n", " clear_output(wait=True)\n", - " print(clkhash.rest_client.format_run_status(update))\n", - "time.sleep(3)" + " print(format_run_status(update))" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "data = json.loads(clkhash.rest_client.run_get_result_text(\n", - " url, \n", + "data = json.loads(rest_client.run_get_result_text(\n", " project_id, \n", " run_id, \n", " credentials['result_token']))['similarity_scores']" @@ -664,7 +663,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": { "pycharm": { "is_executing": false @@ -704,7 +703,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 16, "metadata": { "pycharm": { "is_executing": false @@ -714,10 +713,10 @@ { "data": { "text/plain": [ - "1572906" + "1150393" ] }, - "execution_count": 19, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -737,7 +736,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 17, "metadata": { "pycharm": { "is_executing": false @@ -746,7 +745,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAElFJREFUeJzt3W+QnWd53/HvD9kmbSG1HG89RhJZN4hpxYsIujWmKY0Lgy3saQVtSkynQbieKpnYM2EmeSGSF05JPeO0BQYmxFMnVjFMwHESUjSxUqM4MDQdjC2DMZZVx4sRYynCViJD8DClkXP1xbkFJ2JXe3b37Dla39/PzJl9zvX8OfelI52fnj/n2VQVkqT+vGjaA5AkTYcBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASerUedMewNlcfPHFNTs7O+1hSNK68tBDD/15Vc0stdw5HQCzs7McPHhw2sOQpHUlyddGWc5DQJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTSwZAkh9I8kCSLyU5lOQ/tvplST6fZD7Jbye5oNVf3J7Pt/mzQ9t6d6s/nuTqtWpKkrS0UfYAvgO8oap+FNgO7EhyBfCrwPur6hXAs8ANbfkbgGdb/f1tOZJsA64DXgXsAH49yYZxNiNJGt2SAVADz7Wn57dHAW8AfrfV7wTe0qZ3tue0+W9Mkla/q6q+U1VfBeaBy8fShSRp2Ub6JnD7n/pDwCuADwFfAb5RVafaIkeBTW16E/AUQFWdSvJN4Ida/f6hzQ6vsyZm99yzYP3Irdeu5ctK0row0kngqnq+qrYDmxn8r/0frNWAkuxOcjDJwRMnTqzVy0hS95Z1FVBVfQP4NPA64MIkp/cgNgPH2vQxYAtAm/93gb8Yri+wzvBr3F5Vc1U1NzOz5L2MJEkrNMpVQDNJLmzTfwt4E3CYQRD8RFtsF/DJNr2vPafN/+Oqqla/rl0ldBmwFXhgXI1IkpZnlHMAlwJ3tvMALwLurqo/SPIYcFeS/wR8EbijLX8H8NEk88BJBlf+UFWHktwNPAacAm6squfH244kaVRLBkBVPQK8eoH6kyxwFU9V/V/g3yyyrVuAW5Y/TEnSuPlNYEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ1aMgCSbEny6SSPJTmU5Oda/ZeTHEvycHtcM7TOu5PMJ3k8ydVD9R2tNp9kz9q0JEkaxXkjLHMK+Pmq+kKSlwIPJTnQ5r2/qv7r8MJJtgHXAa8CXgb8UZJXttkfAt4EHAUeTLKvqh4bRyPLMbvnngXrR269dsIjkaTpWTIAquo4cLxNfyvJYWDTWVbZCdxVVd8BvppkHri8zZuvqicBktzVlp14AEiSlnkOIMks8Grg8610U5JHkuxNsrHVNgFPDa12tNUWq5/5GruTHExy8MSJE8sZniRpGUYOgCQvAX4PeFdV/SVwG/AjwHYGewjvHceAqur2qpqrqrmZmZlxbFKStIBRzgGQ5HwGH/6/VVWfAKiqp4fm/wbwB+3pMWDL0OqbW42z1CVJEzbKVUAB7gAOV9X7huqXDi32VuDRNr0PuC7Ji5NcBmwFHgAeBLYmuSzJBQxOFO8bTxuSpOUaZQ/gx4CfAr6c5OFW+0Xg7Um2AwUcAX4aoKoOJbmbwcndU8CNVfU8QJKbgHuBDcDeqjo0xl4kScswylVAfwJkgVn7z7LOLcAtC9T3n209SdLk+E1gSeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUqfOmPYBzyeyeexasH7n12gmPRJLWnnsAktQpA0CSOrVkACTZkuTTSR5LcijJz7X6RUkOJHmi/dzY6knywSTzSR5J8pqhbe1qyz+RZNfatSVJWsooewCngJ+vqm3AFcCNSbYBe4D7qmorcF97DvBmYGt77AZug0FgADcDrwUuB24+HRqSpMlbMgCq6nhVfaFNfws4DGwCdgJ3tsXuBN7SpncCH6mB+4ELk1wKXA0cqKqTVfUscADYMdZuJEkjW9Y5gCSzwKuBzwOXVNXxNuvrwCVtehPw1NBqR1ttsbokaQpGDoAkLwF+D3hXVf3l8LyqKqDGMaAku5McTHLwxIkT49ikJGkBIwVAkvMZfPj/VlV9opWfbod2aD+fafVjwJah1Te32mL1v6Gqbq+quaqam5mZWU4vkqRlGOUqoAB3AIer6n1Ds/YBp6/k2QV8cqj+jnY10BXAN9uhonuBq5JsbCd/r2o1SdIUjPJN4B8Dfgr4cpKHW+0XgVuBu5PcAHwNeFubtx+4BpgHvg1cD1BVJ5P8CvBgW+49VXVyLF1IkpZtyQCoqj8BssjsNy6wfAE3LrKtvcDe5QxQkrQ2/CawJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSp0a5HXT3Zvfcs2D9yK3XTngkkjQ+7gFIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdWjIAkuxN8kySR4dqv5zkWJKH2+OaoXnvTjKf5PEkVw/Vd7TafJI9429FkrQco+wBfBjYsUD9/VW1vT32AyTZBlwHvKqt8+tJNiTZAHwIeDOwDXh7W1aSNCVL3g20qj6bZHbE7e0E7qqq7wBfTTIPXN7mzVfVkwBJ7mrLPrbsEUuSxmI15wBuSvJIO0S0sdU2AU8NLXO01RarS5KmZKUBcBvwI8B24Djw3nENKMnuJAeTHDxx4sS4NitJOsOKAqCqnq6q56vqr4Hf4HuHeY4BW4YW3dxqi9UX2vbtVTVXVXMzMzMrGZ4kaQQr+o1gSS6tquPt6VuB01cI7QM+luR9wMuArcADQICtSS5j8MF/HfBvVzPwc4G/KUzSerZkACT5OHAlcHGSo8DNwJVJtgMFHAF+GqCqDiW5m8HJ3VPAjVX1fNvOTcC9wAZgb1UdGns3kqSRjXIV0NsXKN9xluVvAW5ZoL4f2L+s0UmS1ozfBJakThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnq1HnTHsAL0eyeexasH7n12gmPRJIW5x6AJHXKAJCkThkAktQpA0CSOrVkACTZm+SZJI8O1S5KciDJE+3nxlZPkg8mmU/ySJLXDK2zqy3/RJJda9OOJGlUo+wBfBjYcUZtD3BfVW0F7mvPAd4MbG2P3cBtMAgM4GbgtcDlwM2nQ0OSNB1LBkBVfRY4eUZ5J3Bnm74TeMtQ/SM1cD9wYZJLgauBA1V1sqqeBQ7w/aEiSZqglZ4DuKSqjrfprwOXtOlNwFNDyx1ttcXq3yfJ7iQHkxw8ceLECocnSVrKqk8CV1UBNYaxnN7e7VU1V1VzMzMz49qsJOkMKw2Ap9uhHdrPZ1r9GLBlaLnNrbZYXZI0JSsNgH3A6St5dgGfHKq/o10NdAXwzXao6F7gqiQb28nfq1pNkjQlS94LKMnHgSuBi5McZXA1z63A3UluAL4GvK0tvh+4BpgHvg1cD1BVJ5P8CvBgW+49VXXmiWVJ0gRlcAj/3DQ3N1cHDx5c8fqL3ZTtXONN4iSNU5KHqmpuqeX8JrAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnlrwdtNbeYnct9S6hktaSewCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnVnUvoCRHgG8BzwOnqmouyUXAbwOzwBHgbVX1bJIAHwCuAb4NvLOqvrCa13+h8x5BktbSOPYA/nlVba+qufZ8D3BfVW0F7mvPAd4MbG2P3cBtY3htSdIKrcUhoJ3AnW36TuAtQ/WP1MD9wIVJLl2D15ckjWC1AVDAp5I8lGR3q11SVcfb9NeBS9r0JuCpoXWPtpokaQpW+/sA/mlVHUvy94ADSf7P8MyqqiS1nA22INkN8PKXv3yVw5MkLWZVewBVdaz9fAb4feBy4OnTh3baz2fa4seALUOrb261M7d5e1XNVdXczMzMaoYnSTqLFQdAkr+T5KWnp4GrgEeBfcCuttgu4JNteh/wjgxcAXxz6FCRJGnCVnMI6BLg9wdXd3Ie8LGq+p9JHgTuTnID8DXgbW35/QwuAZ1ncBno9at4bUnSKq04AKrqSeBHF6j/BfDGBeoF3LjS15MkjZe/FH4dWuwLYuCXxCSNzltBSFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE55GegLjL9DQNKo3AOQpE4ZAJLUKQNAkjplAEhSpzwJ3AlPDks6k3sAktQpA0CSOmUASFKnDABJ6pQBIEmd8iqgznl1kNQv9wAkqVPuAWhB7hlIL3zuAUhSpwwASeqUh4C0LB4akl443AOQpE5NfA8gyQ7gA8AG4Der6tZJj0Hj556BtP5MNACSbAA+BLwJOAo8mGRfVT02yXFocgwG6dw16T2Ay4H5qnoSIMldwE7AAOiMwSBN36QDYBPw1NDzo8BrJzwGncMWC4aVMEykszvnrgJKshvY3Z4+l+TxVWzuYuDPVz+qdaW3nhftN7864ZFMTm/vMdjzcv3wKAtNOgCOAVuGnm9ute+qqtuB28fxYkkOVtXcOLa1XvTWc2/9gj33YhI9T/oy0AeBrUkuS3IBcB2wb8JjkCQx4T2AqjqV5CbgXgaXge6tqkOTHIMkaWDi5wCqaj+wf0IvN5ZDSetMbz331i/Ycy/WvOdU1Vq/hiTpHOStICSpU+syAJLsSPJ4kvkkexaY/8NJ7kvySJLPJNk8NG9XkifaY9dkR75yK+05yfYkn0tyqM37ycmPfmVW8z63+T+Y5GiSX5vcqFdnlX+3X57kU0kOJ3ksyewkx75Sq+z5P7e/24eTfDBJJjv65UuyN8kzSR5dZH5aL/Ot59cMzRvv51dVrasHg5PHXwH+PnAB8CVg2xnL/A6wq02/Afhom74IeLL93NimN067pzXu+ZXA1jb9MuA4cOG0e1rLnofmfwD4GPBr0+5nEj0DnwHe1KZfAvztafe0lj0D/wT4320bG4DPAVdOu6cRev5nwGuARxeZfw3wh0CAK4DPt/rYP7/W4x7Ad28nUVX/Dzh9O4lh24A/btOfHpp/NXCgqk5W1bPAAWDHBMa8Wivuuar+tKqeaNN/BjwDzExk1KuzmveZJP8IuAT41ATGOi4r7jnJNuC8qjoAUFXPVdW3JzPsVVnN+1zADzAIjhcD5wNPr/mIV6mqPgucPMsiO4GP1MD9wIVJLmUNPr/WYwAsdDuJTWcs8yXgX7XptwIvTfJDI657LlpNz9+V5HIG/1i+skbjHKcV95zkRcB7gV9Y81GO12re51cC30jyiSRfTPJf2s0Xz3Ur7rmqPscgEI63x71VdXiNxzsJi/2ZjP3zaz0GwCh+AfjxJF8EfpzBt42fn+6Q1txZe27/g/gocH1V/fV0hjh2i/X8s8D+qjo6zcGtkcV6Pg94fZv/jxkcUnnnlMY4bgv2nOQVwD9kcEeBTcAbkrx+esNcf865ewGNYJTbSfwZ7X8MSV4C/Ouq+kaSY8CVZ6z7mbUc7JisuOf2/AeBe4BfaruU68Fq3ufXAa9P8rMMjoVfkOS5qvq+E4znmNX0fBR4uL53p93/weD48R2TGPgqrKbn/wDcX1XPtXl/CLwO+F+TGPgaWuzPZPyfX9M+IbKCEyjnMTj5cRnfO2n0qjOWuRh4UZu+BXjP0EmUrzI4gbKxTV807Z7WuOcLgPuAd027j0n1fMYy72T9nARezfu8oS0/057/d+DGafe0xj3/JPBHbRvnt7/n/2LaPY3Y9yyLnwS+lr95EviBVh/759fU/yBW+Id3DfCnDI5l/1KrvQf4l236J4An2jK/Cbx4aN1/D8y3x/XT7mWtewb+HfBXwMNDj+3T7met3+ehbaybAFhtzwx+0dIjwJeBDwMXTLufteyZQej9N+Awg98p8r5p9zJivx9ncM7irxgcx78B+BngZ9r8MPjFWV9p7+Xc0Lpj/fzym8CS1KkX6klgSdISDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjr1/wHNa9U2GtFvqQAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAPGElEQVR4nO3df6xfd13H8eeLjmEUyDpbm9lt3KklsfzhnHUMFZkQtm6LFtDwI1HKXKyEkUgif1T5Y2aEpGrAsIALVSobEciMII0rjlohqGGwImPsh9DL6LLWshYL6LJEAd/+8f0UvnT3trf3+6t3n+cj+eZ7vp/zOef7efd7+zrnnnO+56aqkCT14WmzHoAkaXoMfUnqiKEvSR0x9CWpI4a+JHXknFkP4FTWrFlTc3Nzsx6GJK0on/vc575eVWsXmndWh/7c3Bz79++f9TAkaUVJ8shi8zy8I0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTmrv5E7qrntdy7YfnDHdVMeiSSdHdzTl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SerIaUM/yUVJPpHkwSQPJPm91n5+kr1JDrTn1a09SW5JMp/kviSXDa1ra+t/IMnWyZUlSVrIUvb0vwP8flVtBK4AbkyyEdgO7KuqDcC+9hrgGmBDe2wDboXBRgK4CXg+cDlw04kNhSRpOk4b+lV1pKr+rU3/N/AQsB7YAtzWut0GvKxNbwFur4G7gfOSXABcDeytquNV9Q1gL7B5rNVIkk7pjI7pJ5kDfhb4DLCuqo60WV8D1rXp9cCjQ4sdam2LtZ/8HtuS7E+y/9ixY2cyPEnSaSw59JM8E/hb4E1V9V/D86qqgBrHgKpqZ1VtqqpNa9euHccqJUnNkkI/ydMZBP5fV9WHW/Nj7bAN7floaz8MXDS0+IWtbbF2SdKULOXqnQDvBR6qqncMzdoNnLgCZyvw0aH217areK4AvtUOA90FXJVkdTuBe1VrkyRNyTlL6POLwG8BX0xyb2v7Q2AHcEeSG4BHgFe2eXuAa4F54AngeoCqOp7krcA9rd/NVXV8LFVIkpbktKFfVf8CZJHZL1mgfwE3LrKuXcCuMxmgJGl8/EauJHXE0Jekjhj6ktQRQ1+SOrKUq3eecua237lg+8Ed1015JJI0Xe7pS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI6cM+sBnE3mtt+5YPvBHddNeSSSNBmn3dNPsivJ0ST3D7X9UZLDSe5tj2uH5v1BkvkkX0py9VD75tY2n2T7+EuRJJ3OUg7vvA/YvED7n1XVpe2xByDJRuDVwPPaMn+eZFWSVcC7gWuAjcBrWl9J0hSd9vBOVX0qydwS17cF+FBV/Q/w1STzwOVt3nxVPQyQ5EOt74NnPGJJ0rKNciL3jUnua4d/Vre29cCjQ30OtbbF2p8kybYk+5PsP3bs2AjDkySdbLmhfyvwk8ClwBHg7eMaUFXtrKpNVbVp7dq141qtJIllXr1TVY+dmE7yF8Dft5eHgYuGul7Y2jhFuyRpSpa1p5/kgqGXLwdOXNmzG3h1kmckuQTYAHwWuAfYkOSSJOcyONm7e/nDliQtx2n39JN8ELgSWJPkEHATcGWSS4ECDgK/C1BVDyS5g8EJ2u8AN1bVd9t63gjcBawCdlXVA2OvRpJ0Sku5euc1CzS/9xT93wa8bYH2PcCeMxqdJGmsvA2DJHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHVnWn0vszdz2OxdsP7jjuimPRJJG456+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SO+OcSR+CfUZS00rinL0kdMfQlqSOnDf0ku5IcTXL/UNv5SfYmOdCeV7f2JLklyXyS+5JcNrTM1tb/QJKtkylHknQqS9nTfx+w+aS27cC+qtoA7GuvAa4BNrTHNuBWGGwkgJuA5wOXAzed2FBIkqbntKFfVZ8Cjp/UvAW4rU3fBrxsqP32GrgbOC/JBcDVwN6qOl5V3wD28uQNiSRpwpZ7TH9dVR1p018D1rXp9cCjQ/0OtbbF2p8kybYk+5PsP3bs2DKHJ0layMgncquqgBrDWE6sb2dVbaqqTWvXrh3XaiVJLD/0H2uHbWjPR1v7YeCioX4XtrbF2iVJU7Tc0N8NnLgCZyvw0aH217areK4AvtUOA90FXJVkdTuBe1VrkyRN0Wm/kZvkg8CVwJokhxhchbMDuCPJDcAjwCtb9z3AtcA88ARwPUBVHU/yVuCe1u/mqjr55LAkacJOG/pV9ZpFZr1kgb4F3LjIenYBu85odJKksfIbuZLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkdOe+8dnbm57Xcu2H5wx3VTHokk/SD39CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjriXTanyLtvSpo19/QlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdWSk0E9yMMkXk9ybZH9rOz/J3iQH2vPq1p4ktySZT3JfksvGUYAkaenGsaf/K1V1aVVtaq+3A/uqagOwr70GuAbY0B7bgFvH8N6SpDMwicM7W4Db2vRtwMuG2m+vgbuB85JcMIH3lyQtYtS7bBbw8SQFvKeqdgLrqupIm/81YF2bXg88OrTsodZ2ZKiNJNsY/CbAxRdfPOLwVgbvvilpWkYN/V+qqsNJfgzYm+Tfh2dWVbUNwpK1DcdOgE2bNp3RspKkUxvp8E5VHW7PR4GPAJcDj504bNOej7buh4GLhha/sLVJkqZk2aGf5EeSPOvENHAVcD+wG9jaum0FPtqmdwOvbVfxXAF8a+gwkCRpCkY5vLMO+EiSE+v5QFX9Q5J7gDuS3AA8Aryy9d8DXAvMA08A14/w3pKkZVh26FfVw8DPLND+n8BLFmgv4Mblvp8kaXR+I1eSOmLoS1JHDH1J6oihL0kdMfQlqSOjfiNXE+TtGSSNm3v6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xOv0V6DFrt8Hr+GXdGru6UtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOeMnmU4y3Y5Z0Ku7pS1JHDH1J6oihL0kdMfQlqSOeyO2EJ3glgXv6ktQVQ1+SOmLoS1JHDH1J6ogncjvnCV6pL+7pS1JH3NPXgvwNQHpqck9fkjpi6EtSRzy8ozPiYR9pZTP0NRZuDKSVYeqhn2Qz8E5gFfCXVbVj2mPQ9LgxkM4uUw39JKuAdwMvBQ4B9yTZXVUPTnMcmj03BtJsTHtP/3JgvqoeBkjyIWALYOgLWHxjME5uWNSzaYf+euDRodeHgOcPd0iyDdjWXj6e5EsjvN8a4OsjLL8S9VbzGdebP57QSKant88YrPlMPWexGWfdidyq2gnsHMe6kuyvqk3jWNdK0VvNvdUL1tyLSdU87ev0DwMXDb2+sLVJkqZg2qF/D7AhySVJzgVeDeye8hgkqVtTPbxTVd9J8kbgLgaXbO6qqgcm+JZjOUy0wvRWc2/1gjX3YiI1p6omsV5J0lnIe+9IUkcMfUnqyIoM/SSbk3wpyXyS7QvMf06SfUnuS/LJJBcOzdua5EB7bJ3uyJdvuTUnuTTJp5M80Oa9avqjX55RPuc2/9lJDiV51/RGPZoRf7YvTvLxJA8leTDJ3DTHvlwj1vwn7Wf7oSS3JMl0R3/mkuxKcjTJ/YvMT6tlvtV82dC80fOrqlbUg8EJ4K8APwGcC3wB2HhSn78BtrbpFwPvb9PnAw+359VtevWsa5pwzc8FNrTpHweOAOfNuqZJ1jw0/53AB4B3zbqeadQMfBJ4aZt+JvDDs65pkjUDvwD8a1vHKuDTwJWzrmkJNf8ycBlw/yLzrwU+BgS4AvhMax9Lfq3EPf3v3cqhqv4XOHErh2EbgX9q058Ymn81sLeqjlfVN4C9wOYpjHlUy665qr5cVQfa9H8AR4G1Uxn1aEb5nEnyc8A64ONTGOu4LLvmJBuBc6pqL0BVPV5VT0xn2CMZ5XMu4IcYbCyeATwdeGziIx5RVX0KOH6KLluA22vgbuC8JBcwpvxaiaG/0K0c1p/U5wvAK9r0y4FnJfnRJS57Nhql5u9JcjmD/yBfmdA4x2nZNSd5GvB24M0TH+V4jfI5Pxf4ZpIPJ/l8kj9tNzg82y275qr6NIONwJH2uKuqHprweKdhsX+TseTXSgz9pXgz8KIknwdexOBbv9+d7ZAm7pQ1tz2F9wPXV9X/zWaIY7dYzW8A9lTVoVkObkIWq/kc4IVt/s8zOFzyuhmNcdwWrDnJTwE/zeCb/euBFyd54eyGuTKcdffeWYLT3sqhHcZ4BUCSZwK/XlXfTHIYuPKkZT85ycGOybJrbq+fDdwJvKX9urgSjPI5vwB4YZI3MDi2fW6Sx6vqSScJzzKj1HwIuLe+fwfbv2NwPPi90xj4CEap+XeAu6vq8TbvY8ALgH+exsAnaLF/k/Hk16xPaizjJMg5DE5gXML3T/w876Q+a4Cntem3ATcPnQj5KoOTIKvb9PmzrmnCNZ8L7APeNOs6plXzSX1ex8o5kTvK57yq9V/bXv8VcOOsa5pwza8C/rGt4+nt5/xXZ13TEuueY/ETudfxgydyP9vax5JfMy9+mf9g1wJfZnBs+i2t7Wbg19r0bwAHWp+/BJ4xtOxvA/Ptcf2sa5l0zcBvAt8G7h16XDrreib9OQ+tY8WE/qg1M/jjRPcBXwTeB5w763omWTODDd17gIcY/E2Od8y6liXW+0EG5yC+zeC4/A3A64HXt/lh8MemvtI+y01Dy46cX96GQZI68lQ9kStJWoChL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjry/11JeywGTfuPAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -772,7 +771,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 18, "metadata": { "pycharm": { "is_executing": false @@ -781,7 +780,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEIpJREFUeJzt3XuMpXV9x/H3h10u9cptS8guOLTStPQi0i3FWqtAbLlYl7aI2KYudNONERMba+q2/aOpqQm0qaixMd2IdTH1Qq0WolihC8ReBF3kDlUWCmG3CKsCLSW2Yr/94/yos+sMc2bOnDkzv32/kpPzPL/nOed8f/PsfM5vfs9zzqaqkCT164BJFyBJGi+DXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktS51ZMuAODII4+sqampSZchSSvKzTff/I2qWjPXfssi6KemptixY8eky5CkFSXJg8Ps59SNJHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1bll8MnYUU1s+O2P7AxefvcSVSNLy5Ihekjpn0EtS5wx6SercUEGf5IEkdyS5NcmO1nZ4kmuT3NvuD2vtSfK+JDuT3J7kpHF2QJL07OYzoj+1qk6sqvVtfQuwvaqOB7a3dYAzgePbbTPwgcUqVpI0f6NM3WwAtrXlbcA509ovr4EbgUOTHD3C60iSRjBs0BdwTZKbk2xubUdV1cNt+evAUW15LfDQtMfuam17SbI5yY4kO/bs2bOA0iVJwxj2Ovqfr6rdSX4QuDbJv07fWFWVpObzwlW1FdgKsH79+nk9VpI0vKFG9FW1u90/CnwaOBl45JkpmXb/aNt9N3DMtIeva22SpAmYM+iTPDfJ859ZBn4RuBO4CtjYdtsIXNmWrwLe2K6+OQV4YtoUjyRpiQ0zdXMU8Okkz+z/0ar6+yRfBq5Isgl4EDiv7X81cBawE3gKuHDRq5YkDW3OoK+q+4GXzND+TeD0GdoLuGhRqpMkjcxPxkpS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1Lmhgz7JqiS3JPlMWz8uyU1Jdib5RJKDWvvBbX1n2z41ntIlScOYz4j+rcA909YvAS6tqhcDjwGbWvsm4LHWfmnbT5I0IUMFfZJ1wNnAB9t6gNOAT7ZdtgHntOUNbZ22/fS2vyRpAoYd0b8H+D3gf9v6EcDjVfV0W98FrG3La4GHANr2J9r+e0myOcmOJDv27NmzwPIlSXOZM+iTvAZ4tKpuXswXrqqtVbW+qtavWbNmMZ9akjTN6iH2eTnw2iRnAYcALwDeCxyaZHUbta8Ddrf9dwPHALuSrAZeCHxz0SuXJA1lzhF9Vf1+Va2rqingfOC6qvoN4Hrg3LbbRuDKtnxVW6dtv66qalGrliQNbZTr6N8BvC3JTgZz8Je19suAI1r724Ato5UoSRrFMFM3/6+qbgBuaMv3AyfPsM+3gdctQm2SpEXgJ2MlqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXOrJ13AuExt+eyM7Q9cfPYSVyJJk+WIXpI6Z9BLUucMeknqnEEvSZ2bM+iTHJLkS0luS3JXkj9u7ccluSnJziSfSHJQaz+4re9s26fG2wVJ0rMZZkT/38BpVfUS4ETgjCSnAJcAl1bVi4HHgE1t/03AY6390rafJGlC5gz6GniyrR7YbgWcBnyytW8DzmnLG9o6bfvpSbJoFUuS5mWoOfokq5LcCjwKXAvcBzxeVU+3XXYBa9vyWuAhgLb9CeCIxSxakjS8oYK+qr5bVScC64CTgR8d9YWTbE6yI8mOPXv2jPp0kqRZzOuqm6p6HLgeeBlwaJJnPlm7DtjdlncDxwC07S8EvjnDc22tqvVVtX7NmjULLF+SNJdhrrpZk+TQtvwDwKuBexgE/rltt43AlW35qrZO235dVdViFi1JGt4w33VzNLAtySoGbwxXVNVnktwNfDzJnwC3AJe1/S8DPpJkJ/At4Pwx1C1JGtKcQV9VtwMvnaH9fgbz9fu2fxt43aJUJ0kamZ+MlaTOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXPD/McjXZna8tlZtz1w8dlLWIkkLQ1H9JLUOYNekjpn0EtS5wx6SeqcQS9Jndvvrrp5NrNdkePVOJJWMkf0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUuTmDPskxSa5PcneSu5K8tbUfnuTaJPe2+8Nae5K8L8nOJLcnOWncnZAkzW6YEf3TwO9W1QnAKcBFSU4AtgDbq+p4YHtbBzgTOL7dNgMfWPSqJUlDmzPoq+rhqvpKW/5P4B5gLbAB2NZ22wac05Y3AJfXwI3AoUmOXvTKJUlDmdccfZIp4KXATcBRVfVw2/R14Ki2vBZ4aNrDdrU2SdIEDB30SZ4H/C3wO1X1H9O3VVUBNZ8XTrI5yY4kO/bs2TOfh0qS5mGorylOciCDkP/rqvpUa34kydFV9XCbmnm0te8Gjpn28HWtbS9VtRXYCrB+/fp5vUlIEvjV4sMa5qqbAJcB91TVu6dtugrY2JY3AldOa39ju/rmFOCJaVM8kqQlNsyI/uXAbwJ3JLm1tf0BcDFwRZJNwIPAeW3b1cBZwE7gKeDCRa1YkjQvcwZ9Vf0TkFk2nz7D/gVcNGJdkqRF4idjJalzBr0kdc6gl6TOGfSS1DmDXpI6N9QHpvZ3fihDmqzZfgc1HEf0ktQ5g16SOufUjaTuON26N0f0ktQ5g16SOufUjaT9xv46peOIXpI6Z9BLUucMeknqnHP0kpYNPwE7Ho7oJalzBr0kdc6gl6TOOUcvaWz21+vWlxuDXtJ+r/c3JINe0pLz6pqlZdBLGsqzhXMvI99eeTJWkjpn0EtS55y6GUHvJ3CkYTnnvrw5opekzjmil7QXR+ff08tf7Y7oJalzBr0kdc6gl6TOzTlHn+RDwGuAR6vqJ1rb4cAngCngAeC8qnosSYD3AmcBTwEXVNVXxlP68tXLvJ6kPgwzov8wcMY+bVuA7VV1PLC9rQOcCRzfbpuBDyxOmZKkhZpzRF9VX0gytU/zBuBVbXkbcAPwjtZ+eVUVcGOSQ5McXVUPL1bB0v5usf5i9Oqa/cdC5+iPmhbeXweOastrgYem7bertUmSJmTk6+irqpLUfB+XZDOD6R2OPfbYUcuQNAtH7lroiP6RJEcDtPtHW/tu4Jhp+61rbd+nqrZW1fqqWr9mzZoFliFJmstCR/RXARuBi9v9ldPa35Lk48DPAk84Pz83r9KRNE7DXF75MQYnXo9Msgv4IwYBf0WSTcCDwHlt96sZXFq5k8HllReOoWapK77Ra9yGuermDbNsOn2GfQu4aNSiJDm3rsXjl5pJC+RIXCuFX4EgSZ1zRL8fcOS5tPx5a7kx6LVfMYS1PzLol5An1xZuUgG9ko7ZSqpVS8ugl1iakDSINSkG/TI27lHsswWPUxlSP7zqRpI654heYzXf6Yql+GtF2t8Y9JqRV6dI/TDoJWmeFjIQmuTgyaDXsjLfKRenaKS5GfSaF6d0pJXHoF+BlmPYOrKWli+DviOGraSZeB29JHXOoJekzhn0ktQ5g16SOmfQS1LnvOpGkhbJcr3yzRG9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM6NJeiTnJHkq0l2JtkyjteQJA1n0YM+ySrgL4AzgROANyQ5YbFfR5I0nHGM6E8GdlbV/VX1P8DHgQ1jeB1J0hDGEfRrgYemre9qbZKkCZjY1xQn2QxsbqtPJvnqAp/qSOAbi1PVxNmX5aeXfoB9WZZyyUh9edEwO40j6HcDx0xbX9fa9lJVW4Gto75Ykh1VtX7U51kO7Mvy00s/wL4sV0vRl3FM3XwZOD7JcUkOAs4HrhrD60iShrDoI/qqejrJW4DPA6uAD1XVXYv9OpKk4Yxljr6qrgauHsdzz2Dk6Z9lxL4sP730A+zLcjX2vqSqxv0akqQJ8isQJKlzyzro5/oqhSQvSrI9ye1Jbkiybp/tL0iyK8n7l67q7zdKP5J8N8mt7Tbxk9oj9uXYJNckuSfJ3UmmlrL2fS20L0lOnXZMbk3y7STnLH0P9qp1lOPyp0nuasflfUmytNXvVeco/bgkyZ3t9vqlrfz7JflQkkeT3DnL9rSf987Wn5OmbduY5N522zhyMVW1LG8MTuTeB/wQcBBwG3DCPvv8DbCxLZ8GfGSf7e8FPgq8f6X2A3hy0sdiEftyA/Dqtvw84DkrtS/T9jkc+NZK7Qvwc8A/t+dYBXwReNUK7MfZwLUMzjs+l8HVfy+Y1DFpNf0CcBJw5yzbzwI+BwQ4Bbhp2r+p+9v9YW35sFFqWc4j+mG+SuEE4Lq2fP307Ul+GjgKuGYJan02I/VjmVlwX9r3Ha2uqmsBqurJqnpqacqe0WIdl3OBz63gvhRwCINgPRg4EHhk7BXPbJR+nAB8oaqerqr/Am4HzliCmmdVVV9gMAiYzQbg8hq4ETg0ydHALwHXVtW3quoxBm9gI/VlOQf9MF+lcBvwq235V4DnJzkiyQHAnwNvH3uVc1twP9r6IUl2JLlx0tMDjNaXHwEeT/KpJLck+bP2BXiTMupxecb5wMfGUuHwFtyXqvoig8B8uN0+X1X3jLne2YxyTG4DzkjynCRHAqey9wc3l6PZ+rvoXyOznIN+GG8HXpnkFuCVDD6B+13gzcDVVbVrksXNw2z9AHhRDT419+vAe5L88IRqHNZsfVkNvKJt/xkGf55fMKEah/Vsx4U2+vpJBp8ZWe5m7EuSFwM/xuAT7GuB05K8YnJlzmnGflTVNQwu6f4XBm+8X2TasdrfTey7boYw51cpVNW/097dkzwP+LWqejzJy4BXJHkzg7ngg5I8WVWT+G78Bfejbdvd7u9PcgPwUgbzmJMwyjHZBdxaVfe3bX/HYF7ysqUofAYjHZfmPODTVfWdMdc6l1GOy28DN1bVk23b54CXAf+4FIXvY9TflXcB72rbPgp8bQlqHsVs/d0NvGqf9htGeqVJnqyY40TGagYnIY7jeydmfnyffY4EDmjL7wLeOcPzXMBkT8YuuB8MTsQcPG2fe9nn5NQK6suqtv+atv5XwEUrsS/Ttt8InDqpPizScXk98A/tOQ4EtgO/vAL7sQo4oi3/FHAng3NCkz42U8x+MvZs9j4Z+6XWfjjwb+33/7C2fPhIdUz6BzHHD+ksBu/K9wF/2NreCby2LZ/bwu9rwAefCcV9nuMCJhj0o/SDwRURd7R/8HcAm1byMQFezeAk2R3Ah4GDVnBfphiMvA6Y9DEZ8d/YKuAvgXuAu4F3r9B+HNLqv5vBG/CJy+CYfIzBeY/vMJhn3wS8CXhT2x4G/0nTfe13Yv20x/4WsLPdLhy1Fj8ZK0mdW+knYyVJczDoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3P8BpdqoH5C0KWEAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAARWklEQVR4nO3df4zkdX3H8eeL41cVW0C25DzApYppz7ai3VKstfIjVoS2h61FbKKHkp5GTGqiSdH+obUlwbZiaWxJz0IFIyitWkjFCqLE2gp4KPKz6oFHuOsJp4BKjVbw3T/me2FYdm9md3Zmdz88H8lkv/P5fGfm/dnZe+1nP9/vfC9VhSSpLXstdwGSpKVnuEtSgwx3SWqQ4S5JDTLcJalBey93AQCHHHJITU9PL3cZkrSq3HTTTd+uqqm5+lZEuE9PT7Nly5blLkOSVpUk98zX57KMJDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aEV8QnUU02d/ct6+beeeMsFKJGnlcOYuSQ0y3CWpQQPDPcn+SW5M8tUktyf5s679yCQ3JNma5KNJ9u3a9+vub+36p8c7BEnSbMPM3H8EnFBVzwOOBk5KcizwHuB9VfVs4EHgzG7/M4EHu/b3dftJkiZoYLhXz8Pd3X26WwEnAP/StV8MnNptb+ju0/WfmCRLVrEkaaCh1tyTrElyM3A/cA1wF/BQVT3S7bIdWNdtrwPuBej6vws8fY7n3JRkS5Itu3btGm0UkqTHGSrcq+rRqjoaOAw4Bvj5UV+4qjZX1UxVzUxNzfkfiUiSFmlBZ8tU1UPA54AXAgcm2X2e/GHAjm57B3A4QNf/M8B3lqRaSdJQhjlbZirJgd32TwEvBe6kF/Kv7HbbCFzRbV/Z3afr/2xV1VIWLUnas2E+oboWuDjJGnq/DC6vqn9LcgfwkSR/AXwFuLDb/0LgQ0m2Ag8Ap4+hbknSHgwM96q6BXj+HO1301t/n93+Q+APlqQ6SdKi+AlVSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQcNcOGzVmj77k3O2bzv3lAlXIkmT5cxdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQQPDPcnhST6X5I4ktyf54679XUl2JLm5u53c95i3J9ma5GtJXjbOAUiSnmiYS/4+Ary1qr6c5GnATUmu6freV1V/3b9zkvXA6cBzgWcAn0nynKp6dCkLlyTNb+DMvap2VtWXu+3vA3cC6/bwkA3AR6rqR1X1TWArcMxSFCtJGs6C1tyTTAPPB27omt6c5JYkFyU5qGtbB9zb97DtzPHLIMmmJFuSbNm1a9eCC5ckzW/ocE9yAPAx4C1V9T3gAuBZwNHATuC9C3nhqtpcVTNVNTM1NbWQh0qSBhgq3JPsQy/YP1xVHweoqvuq6tGq+gnwAR5betkBHN738MO6NknShAxztkyAC4E7q+q8vva1fbu9Arit274SOD3JfkmOBI4Cbly6kiVJgwxztsyLgNcAtya5uWt7B/DqJEcDBWwD3gBQVbcnuRy4g96ZNmd5powkTdbAcK+qLwCZo+uqPTzmHOCcEeqSJI3AT6hKUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIatPegHZIcDlwCHAoUsLmqzk9yMPBRYBrYBpxWVQ8mCXA+cDLwA+CMqvryeMpfnOmzPzln+7ZzT5lwJZI0HsPM3B8B3lpV64FjgbOSrAfOBq6tqqOAa7v7AC8Hjupum4ALlrxqSdIeDQz3qtq5e+ZdVd8H7gTWARuAi7vdLgZO7bY3AJdUz/XAgUnWLnnlkqR5LWjNPck08HzgBuDQqtrZdX2L3rIN9IL/3r6Hbe/aZj/XpiRbkmzZtWvXAsuWJO3J0OGe5ADgY8Bbqup7/X1VVfTW44dWVZuraqaqZqamphbyUEnSAEOFe5J96AX7h6vq413zfbuXW7qv93ftO4DD+x5+WNcmSZqQgeHenf1yIXBnVZ3X13UlsLHb3ghc0df+2vQcC3y3b/lGkjQBA0+FBF4EvAa4NcnNXds7gHOBy5OcCdwDnNb1XUXvNMit9E6FfN2SVixJGmhguFfVF4DM033iHPsXcNaIdUmSRuAnVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoGGuCilJK5L/2f38nLlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIs2X6eORdUiucuUtSg5y5S2rOfH+Fz6fFv86duUtSg5y5S1rxFjoTl+EuaQUxxJfOwGWZJBcluT/JbX1t70qyI8nN3e3kvr63J9ma5GtJXjauwiVJ8xtmzf2DwElztL+vqo7ublcBJFkPnA48t3vM3ydZs1TFSpKGMzDcq+rzwANDPt8G4CNV9aOq+iawFThmhPokSYswytkyb05yS7dsc1DXtg64t2+f7V2bJGmCFntA9QLgz4Hqvr4XeP1CniDJJmATwBFHHLHIMiStRh44Hb9Fzdyr6r6qerSqfgJ8gMeWXnYAh/fteljXNtdzbK6qmaqamZqaWkwZkqR5LGrmnmRtVe3s7r4C2H0mzZXApUnOA54BHAXcOHKVkjRGLV5XamC4J7kMOA44JMl24J3AcUmOprcssw14A0BV3Z7kcuAO4BHgrKp6dDylS5LmMzDcq+rVczRfuIf9zwHOGaUoSdJovLaMJDXIcJekBhnuktQgw12SGmS4S1KDvOSvpLHxk6jLx5m7JDXIcJekBhnuktQgw12SGmS4S1KDPFtmCC1eMU5S25y5S1KDnLlL0jz2dJ7+Sv/L3XCX9DirOdD0GMNd0sj8JOrK45q7JDXIcJekBhnuktQg19wlDc219dXDcJeepAzqtrksI0kNMtwlqUEuy4zAa85IWqmcuUtSgwbO3JNcBPw2cH9V/WLXdjDwUWAa2AacVlUPJglwPnAy8APgjKr68nhKlzQMD5w+OQ0zc/8gcNKstrOBa6vqKODa7j7Ay4Gjutsm4IKlKVOStBADZ+5V9fkk07OaNwDHddsXA9cBf9K1X1JVBVyf5MAka6tq51IVLD3ZeaxHw1jsmvuhfYH9LeDQbnsdcG/fftu7tidIsinJliRbdu3atcgyJElzGfmAajdLr0U8bnNVzVTVzNTU1KhlSJL6LDbc70uyFqD7en/XvgM4vG+/w7o2SdIELTbcrwQ2dtsbgSv62l+bnmOB77reLkmTN8ypkJfRO3h6SJLtwDuBc4HLk5wJ3AOc1u1+Fb3TILfSOxXydWOoWZI0wDBny7x6nq4T59i3gLNGLUqSNBo/oSpJDTLcJalBXjhMWiQ/TKSVzHCXBlgt12ZZLXVqMlyWkaQGOXMfA/9cl7TcDHdpifnLXSuB4S4tM9fKNQ6uuUtSg5y5a6zGvUThEog0N8N9BTCgJC01w12aENfWNUmuuUtSg5y5a0VxiUpaGoa7nlT2tDTiLxC1xHBfwZzFTpZr4mqJ4S5JEzDpyZrhPkHODB8z7u+F32s92Xm2jCQ1yJm75rTS1vudiUsL48xdkhrkzP1JwNP/pKW30v66nc2ZuyQ1yJn7k9xSrWW7Ji6tLCOFe5JtwPeBR4FHqmomycHAR4FpYBtwWlU9OFqZWikMcWl1WIplmeOr6uiqmununw1cW1VHAdd29yVJEzSONfcNwMXd9sXAqWN4DUnSHowa7gVcneSmJJu6tkOrame3/S3g0LkemGRTki1JtuzatWvEMiRJ/UY9oPobVbUjyc8C1yT57/7OqqokNdcDq2ozsBlgZmZmzn20MK6HS9ptpHCvqh3d1/uTfAI4Brgvydqq2plkLXD/EtSpPoa4pEEWvSyT5KlJnrZ7G/gt4DbgSmBjt9tG4IpRi5QkLcwoM/dDgU8k2f08l1bVvyf5EnB5kjOBe4DTRi9TkrQQiw73qrobeN4c7d8BThylKEnSaLz8gCQ1yHCXpAYZ7pLUIC8cJklLaKWcquzMXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBYwv3JCcl+VqSrUnOHtfrSJKeaCzhnmQN8HfAy4H1wKuTrB/Ha0mSnmhcM/djgK1VdXdV/R/wEWDDmF5LkjTL3mN63nXAvX33twO/1r9Dkk3Apu7uw0m+tsjXOgT49iIfu9I4lpWplbG0Mg5oaCx5z0hjeeZ8HeMK94GqajOwedTnSbKlqmaWoKRl51hWplbG0so4wLEMY1zLMjuAw/vuH9a1SZImYFzh/iXgqCRHJtkXOB24ckyvJUmaZSzLMlX1SJI3A58G1gAXVdXt43gtlmBpZwVxLCtTK2NpZRzgWAZKVY3jeSVJy8hPqEpSgwx3SWrQig73QZcwSPLMJNcmuSXJdUkOm9X/00m2J3n/5Kqe2yhjSfJokpu727IemB5xHEckuTrJnUnuSDI9ydpnW+xYkhzf937cnOSHSU6d/AgeV+so78tfJrm9e1/+NkkmW/0Tah1lLO9Jclt3e9VkK39CnRcluT/JbfP0p/t+b+3G8oK+vo1JvtHdNi6qgKpakTd6B2LvAn4O2Bf4KrB+1j7/DGzstk8APjSr/3zgUuD9q3kswMPL/X4s0TiuA17abR8APGW1jqVvn4OBB1brWIBfB/6ze441wBeB41bpWE4BrqF3oshT6Z2199PLOJbfBF4A3DZP/8nAp4AAxwI39P1M3d19PajbPmihr7+SZ+7DXMJgPfDZbvtz/f1JfgU4FLh6ArUOMtJYVpBFj6O7ttDeVXUNQFU9XFU/mEzZc1qq9+SVwKdW8VgK2J9ekO4H7APcN/aK5zfKWNYDn6+qR6rqf4FbgJMmUPOcqurz9H7xz2cDcEn1XA8cmGQt8DLgmqp6oKoepPcLa8HjWMnhPtclDNbN2uerwO91268Anpbk6Un2At4LvG3sVQ5n0WPp7u+fZEuS65f5z/9RxvEc4KEkH0/ylSR/1V1gbrmM+p7sdjpw2VgqHN6ix1JVX6QXkDu726er6s4x17sno7wvXwVOSvKUJIcAx/P4D1OuNPONdZjvwUArOdyH8TbgJUm+AryE3qdgHwXeBFxVVduXs7gFmm8sAM+s3seT/xD4myTPWqYahzHfOPYGXtz1/yq9P7vPWKYah7Wn94RulvVL9D7PsdLNOZYkzwZ+gd6nyNcBJyR58fKVOZQ5x1JVVwNXAf9F7xfuF+l7v55slu3aMkMYeAmDqvofut/gSQ4Afr+qHkryQuDFSd5Eb2133yQPV9VyXVd+0WPp+nZ0X+9Och3wfHrrkpM2ynuyHbi5qu7u+v6V3jrjhZMofA4jvSed04BPVNWPx1zrIKO8L38EXF9VD3d9nwJeCPzHJAqfw6j/Vs4Bzun6LgW+PoGaF2u+se4AjpvVft2Cn325DjYMcTBib3oHEo7ksQMrz521zyHAXt32OcC753ieM1j+A6qLHgu9Ayr79e3zDWYdYFol41jT7T/V3f8n4KzV+J709V8PHL+cP1tL8L68CvhM9xz7ANcCv7NKx7IGeHq3/cvAbfSO8yznezPN/AdUT+HxB1Rv7NoPBr7Z/ds/qNs+eMGvvdw/mAO+MSfT+817F/CnXdu7gd/ttl/Zhd3XgX/cHYKznuMMljncRxkLvbMZbu1+yG8FzlyN4+j6XkrvINetwAeBfVfxWKbpzbD2Wu6frRF/vtYA/wDcCdwBnLeKx7J/N4Y76P3iPXqZx3EZveMYP6a3bn4m8EbgjV1/6P2nRnd1/yZm+h77emBrd3vdYl7fyw9IUoNW+wFVSdIcDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoP8H1xfysAfPXP0AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] @@ -807,7 +806,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "metadata": { "pycharm": { "is_executing": false @@ -832,7 +831,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "metadata": { "pycharm": { "is_executing": false @@ -861,7 +860,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 21, "metadata": { "pycharm": { "is_executing": false @@ -873,16 +872,16 @@ "output_type": "stream", "text": [ "Proportion of exact matches for each field using threshold: 0.999\n", - "given_name 0.93\n", - "surname 0.96\n", - "street_number 0.88\n", - "address_1 0.92\n", - "address_2 0.80\n", - "suburb 0.92\n", - "postcode 0.95\n", + "given_name 0.95\n", + "surname 0.94\n", + "street_number 0.85\n", + "address_1 0.93\n", + "address_2 0.75\n", + "suburb 0.95\n", + "postcode 0.97\n", "state 1.00\n", - "date_of_birth 0.96\n", - "soc_sec_id 0.40\n", + "date_of_birth 0.98\n", + "soc_sec_id 0.38\n", "dtype: float64\n" ] } @@ -902,7 +901,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "metadata": { "pycharm": { "is_executing": false @@ -914,15 +913,15 @@ "output_type": "stream", "text": [ "Proportion of exact matches for each field using threshold: 0.95\n", - "given_name 0.49\n", - "surname 0.57\n", - "street_number 0.81\n", - "address_1 0.55\n", - "address_2 0.44\n", - "suburb 0.70\n", - "postcode 0.84\n", - "state 0.93\n", - "date_of_birth 0.84\n", + "given_name 0.58\n", + "surname 0.59\n", + "street_number 0.73\n", + "address_1 0.67\n", + "address_2 0.53\n", + "suburb 0.71\n", + "postcode 0.89\n", + "state 0.95\n", + "date_of_birth 0.75\n", "soc_sec_id 0.92\n", "dtype: float64\n" ] @@ -931,31 +930,6 @@ "source": [ "look_at_per_field_accuracy(threshold = 0.95, num_samples = 100)" ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'0.12.0'" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -974,7 +948,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/docs/tutorial/multiparty-linkage-with-clkhash.ipynb b/docs/tutorial/multiparty-linkage-with-clkhash.ipynb index a6a2ad3d..2e77fea0 100644 --- a/docs/tutorial/multiparty-linkage-with-clkhash.ipynb +++ b/docs/tutorial/multiparty-linkage-with-clkhash.ipynb @@ -21,8 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "KEY1 = 'correct'\n", - "KEY2 = 'horse'\n", + "SECRET = 'my_secret'\n", "\n", "SERVER = os.getenv(\"SERVER\", \"https://testing.es.data61.xyz\")" ] @@ -65,12 +64,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "keys: correct, horse\n" + "keys: my_secret\n" ] } ], "source": [ - "print(f'keys: {KEY1}, {KEY2}')" + "print(f'keys: {SECRET}')" ] }, { @@ -525,10 +524,10 @@ }, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "Project created\n" + "\u001b[31mProject created\u001b[0m\r\n" ] } ], @@ -562,21 +561,15 @@ }, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "\n", - "generating CLKs: 0%| | 0.00/3.23k [00:00\n", " \n", " 0\n", + " msl5\n", + " sydnev\n", " \n", - " peGh\n", - " 395273.665\n", " \n", " \n", " 1\n", + " male\n", + " melbourne\n", " \n", - " sydnev\n", - " 77367.636\n", " \n", " \n", " 2\n", + " male\n", + " canbrrra\n", " \n", - " pertb\n", - " 323383.650\n", " \n", " \n", " 3\n", + " femalr\n", " \n", - " syd1e7y\n", - " 79745.538\n", + " 277039.294\n", " \n", " \n", " 4\n", " \n", " perth\n", - " 28019.494\n", + " 125343.406\n", " \n", " \n", " 5\n", " \n", - " canberra\n", - " 78961.675\n", + " mlebourne\n", + " 56899.522\n", " \n", " \n", " 6\n", - " female\n", - " brisnane\n", + " male\n", + " canberra\n", " \n", " \n", " \n", " 7\n", - " male\n", - " canbetra\n", + " female\n", + " can1>erra\n", " \n", " \n", " \n", " 8\n", - " \n", - " sydme7\n", - " 106849.526\n", + " male\n", + " acbbeera\n", + " 81191.584\n", " \n", " \n", " 9\n", + " maoe\n", + " mesllootrne\n", " \n", - " melbourne\n", - " 68548.966\n", " \n", " \n", "\n", "" ], "text/plain": [ - " gender city income\n", - "0 peGh 395273.665\n", - "1 sydnev 77367.636\n", - "2 pertb 323383.650\n", - "3 syd1e7y 79745.538\n", - "4 perth 28019.494\n", - "5 canberra 78961.675\n", - "6 female brisnane \n", - "7 male canbetra \n", - "8 sydme7 106849.526\n", - "9 melbourne 68548.966" + " gender city income\n", + "0 msl5 sydnev \n", + "1 male melbourne \n", + "2 male canbrrra \n", + "3 femalr 277039.294\n", + "4 perth 125343.406\n", + "5 mlebourne 56899.522\n", + "6 male canberra \n", + "7 female can1>erra \n", + "8 male acbbeera 81191.584\n", + "9 maoe mesllootrne " ] }, "execution_count": 19, @@ -958,21 +940,21 @@ { "data": { "text/plain": [ - "[[[0, 2111], [1, 2100]],\n", - " [[0, 2121], [2, 2131], [1, 2111]],\n", - " [[1, 1146], [2, 1202], [0, 1203]],\n", - " [[1, 2466], [2, 2478], [0, 2460]],\n", - " [[0, 429], [1, 412]],\n", - " [[0, 2669], [1, 1204]],\n", - " [[1, 1596], [2, 1623]],\n", - " [[0, 487], [1, 459]],\n", - " [[1, 1776], [2, 1800], [0, 1806]],\n", - " [[1, 2586], [2, 2602]],\n", - " [[0, 919], [1, 896]],\n", - " [[0, 100], [2, 107], [1, 100]],\n", - " [[0, 129], [1, 131], [2, 135]],\n", - " [[0, 470], [1, 440]],\n", - " [[0, 1736], [1, 1692], [2, 1734]]]" + "[[[0, 2196], [2, 2203]],\n", + " [[1, 399], [2, 401], [0, 414]],\n", + " [[1, 772], [2, 811], [0, 795]],\n", + " [[0, 2868], [2, 2886]],\n", + " [[2, 2335], [0, 948]],\n", + " [[0, 1900], [1, 1866]],\n", + " [[0, 2482], [1, 2494], [2, 2509]],\n", + " [[0, 88], [2, 95], [1, 86]],\n", + " [[0, 1740], [1, 1693], [2, 1736]],\n", + " [[0, 536], [2, 525], [1, 512]],\n", + " [[0, 2489], [1, 2501], [2, 2516]],\n", + " [[0, 1176], [1, 1121]],\n", + " [[1, 1393], [2, 1421], [0, 1451]],\n", + " [[0, 658], [2, 666], [1, 645]],\n", + " [[0, 2317], [2, 2324], [1, 2311]]]" ] }, "execution_count": 20, @@ -1057,34 +1039,25 @@ " \n", " \n", " \n", - " 6426\n", - " 1171\n", - " isabelle\n", - " bridgland\n", - " 30-03-1994\n", - " 04 5318 6471\n", - " mal4\n", + " 6450\n", + " 4201\n", + " cazsandra\n", + " lock\n", + " 20-19-1904\n", + " 08 9282 1556\n", + " femaoe\n", " \n", " \n", - " 6427\n", - " 1171\n", - " isalolIe\n", - " riahgland\n", - " 30-02-1994\n", - " 04 5318 6471\n", - " sydnry\n", + " 6451\n", + " 4201\n", + " cassandra\n", + " lock\n", + " 20-10-1904\n", + " 08 9282 1556\n", + " brisbane\n", " \n", " \n", - " 6428\n", - " 1171\n", - " isabelle\n", - " bridgland\n", - " 30-02-1994\n", - " 04 5318 6471\n", - " 63514.217\n", - " \n", - " \n", - " 6429\n", + " 6452\n", " \n", " \n", " \n", @@ -1093,34 +1066,34 @@ " \n", " \n", " \n", - " 6430\n", - " 1243\n", - " thmoas\n", - " doaldson\n", - " 13-04-1900\n", - " 09 6963 1944\n", - " male\n", + " 6453\n", + " 4914\n", + " runy\n", + " briten\n", + " 01-01-1971\n", + " 03 7339 6523\n", + " malw\n", " \n", " \n", - " 6431\n", - " 1243\n", - " thoma5\n", - " donaldson\n", - " 13-04-1900\n", - " 08 6962 1944\n", - " perth\n", + " 6454\n", + " 4914\n", + " ruby\n", + " britten\n", + " 01-01-1971\n", + " 03 7338 6523\n", + " melbourne\n", " \n", " \n", - " 6432\n", - " 1243\n", - " thomas\n", - " donalsdon\n", - " 13-04-2900\n", - " 08 6963 2944\n", - " 489229.297\n", + " 6455\n", + " 4914\n", + " ruby\n", + " br'ltten\n", + " 01-01-1971\n", + " 03 7337 6523\n", + " 59544.220\n", " \n", " \n", - " 6433\n", + " 6456\n", " \n", " \n", " \n", @@ -1129,25 +1102,34 @@ " \n", " \n", " \n", - " 6434\n", - " 2207\n", - " annah\n", - " aslea\n", - " 02-11-2906\n", - " 04 5501 5973\n", - " male\n", + " 6457\n", + " 2802\n", + " brett\n", + " maynsrd\n", + " 04-09-1968\n", + " 03 3038 1109\n", + " msle\n", " \n", " \n", - " 6435\n", - " 2207\n", - " hannah\n", - " easlea\n", - " 02-11-2006\n", - " 04 5501 5973\n", - " canberra\n", + " 6458\n", + " 2802\n", + " br4tt\n", + " maynard\n", + " 04-09-1867\n", + " 03 3038 2109\n", + " sydney\n", " \n", " \n", - " 6436\n", + " 6459\n", + " 2802\n", + " brett\n", + " maynard\n", + " 04-09-1967\n", + " 03 3038 1109\n", + " 152975.865\n", + " \n", + " \n", + " 6460\n", " \n", " \n", " \n", @@ -1156,34 +1138,34 @@ " \n", " \n", " \n", - " 6437\n", - " 5726\n", - " rhys\n", - " clarke\n", - " 19-05-1929\n", - " 02 9220 9635\n", - " mqle\n", + " 6461\n", + " 7378\n", + " bradley\n", + " campbekl\n", + " 15-07-2009\n", + " 03 2052 7063\n", + " malw\n", " \n", " \n", - " 6438\n", - " 5726\n", - " ry5\n", - " clarke\n", - " 19-05-1939\n", - " 02 9120 9635\n", - " \n", + " 6462\n", + " 7378\n", + " brat\n", + " campbell\n", + " 15-07-2009\n", + " 03 2062 7063\n", + " cnaberfa\n", " \n", " \n", - " 6439\n", - " 5726\n", - " rhys\n", - " klark\n", - " 19-05-2938\n", - " 02 9220 9635\n", - " 118197.119\n", + " 6463\n", + " 7378\n", + " bradely\n", + " campbiol\n", + " 15-07-2009\n", + " 03 2052 7063\n", + " 64924.120\n", " \n", " \n", - " 6440\n", + " 6464\n", " \n", " \n", " \n", @@ -1196,22 +1178,22 @@ "" ], "text/plain": [ - " id given name surname dob phone number non-linking\n", - "6426 1171 isabelle bridgland 30-03-1994 04 5318 6471 mal4\n", - "6427 1171 isalolIe riahgland 30-02-1994 04 5318 6471 sydnry\n", - "6428 1171 isabelle bridgland 30-02-1994 04 5318 6471 63514.217\n", - "6429 \n", - "6430 1243 thmoas doaldson 13-04-1900 09 6963 1944 male\n", - "6431 1243 thoma5 donaldson 13-04-1900 08 6962 1944 perth\n", - "6432 1243 thomas donalsdon 13-04-2900 08 6963 2944 489229.297\n", - "6433 \n", - "6434 2207 annah aslea 02-11-2906 04 5501 5973 male\n", - "6435 2207 hannah easlea 02-11-2006 04 5501 5973 canberra\n", - "6436 \n", - "6437 5726 rhys clarke 19-05-1929 02 9220 9635 mqle\n", - "6438 5726 ry5 clarke 19-05-1939 02 9120 9635 \n", - "6439 5726 rhys klark 19-05-2938 02 9220 9635 118197.119\n", - "6440 " + " id given name surname dob phone number non-linking\n", + "6450 4201 cazsandra lock 20-19-1904 08 9282 1556 femaoe\n", + "6451 4201 cassandra lock 20-10-1904 08 9282 1556 brisbane\n", + "6452 \n", + "6453 4914 runy briten 01-01-1971 03 7339 6523 malw\n", + "6454 4914 ruby britten 01-01-1971 03 7338 6523 melbourne\n", + "6455 4914 ruby br'ltten 01-01-1971 03 7337 6523 59544.220\n", + "6456 \n", + "6457 2802 brett maynsrd 04-09-1968 03 3038 1109 msle\n", + "6458 2802 br4tt maynard 04-09-1867 03 3038 2109 sydney\n", + "6459 2802 brett maynard 04-09-1967 03 3038 1109 152975.865\n", + "6460 \n", + "6461 7378 bradley campbekl 15-07-2009 03 2052 7063 malw\n", + "6462 7378 brat campbell 15-07-2009 03 2062 7063 cnaberfa\n", + "6463 7378 bradely campbiol 15-07-2009 03 2052 7063 64924.120\n", + "6464 " ] }, "execution_count": 22, @@ -1246,7 +1228,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.3" } }, "nbformat": 4, From 4d213bf78edd9f6e105b4b817d38ce305a076aa0 Mon Sep 17 00:00:00 2001 From: Guillaume Smith Date: Mon, 18 Nov 2019 13:52:51 +1100 Subject: [PATCH 3/5] Missed a tutorial. --- ...multiparty-linkage-in-entity-service.ipynb | 201 ++++++++---------- 1 file changed, 92 insertions(+), 109 deletions(-) diff --git a/docs/tutorial/multiparty-linkage-in-entity-service.ipynb b/docs/tutorial/multiparty-linkage-in-entity-service.ipynb index 7f9504a3..a5a5e5f6 100644 --- a/docs/tutorial/multiparty-linkage-in-entity-service.ipynb +++ b/docs/tutorial/multiparty-linkage-in-entity-service.ipynb @@ -52,8 +52,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'project_count': 10, 'rate': 20496894, 'status': 'ok'}\n", - "{'anonlink': '0.11.2', 'entityservice': 'v1.11.0', 'python': '3.6.8'}\n" + "{'project_count': 5944, 'rate': 2260983, 'status': 'ok'}\n", + "{'anonlink': '0.12.5', 'entityservice': 'v1.13.0-alpha', 'python': '3.7.5'}\n" ] } ], @@ -87,11 +87,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "project_id: 8eeb1050f5add8f78ff4a0da04219fead48f22220fb0f15e\n", + "project_id: 21d8916332764c00c0861f1dda132c633c731c377fd89696\n", "\n", - "result_token: c8f22b577aac9432871eeea02cbe504d399a9776add1de9f\n", + "result_token: 4b8c53796161aad56414631fd553d5905256ea5cba0476e8\n", "\n", - "update_tokens: ['6bf0f1c84c17116eb9f93cf8a4cfcb13d49d288a1f376dd8', '4b9265070849af1f0546f2adaeaa85a7d0e60b10f9b4afbc', '3ff03cadd750ce1b40cc4ec2b99db0132f62d8687328eeb9', 'c1b562ece6bbef6cd1a0541301bb1f82bd697bce04736296', '8cfdebbe12c65ae2ff20fd0c0ad5de4feb06c9a9dd1209c8']\n" + "update_tokens: ['f3dafb72996cbc0f453f2acde9dd0e037066039d492c96ee', '28c6cb8b3f85bb528574d51c1f67953af7bb9b835b119451', '028b0b1c05b1e669c7b5bf13caf3a53022481d867c3c0fb9', '105c8d242b51f30388f6f8b0bd4d32189127ea760d22377e', '36955c914e3e0d1aed86a5af32027dfb8a8169532ba4125e']\n" ] } ], @@ -143,27 +143,27 @@ "text": [ "Data provider 1: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"c7d9ba71260863f13af55e12603f8694c29e935262b15687\"\n", + " \"receipt_token\": \"3e102ce587ae97feb18aebf7596aee5ba3ba5b6a41d5bedf\"\n", "}\n", "\n", "Data provider 2: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"70e4ed1b403c4e628183f82548a9297f8417ca3de94648bf\"\n", + " \"receipt_token\": \"ab758b30126ddc083bf65749773fc5856719b4273adc0703\"\n", "}\n", "\n", "Data provider 3: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"b56fe568b93dc4522444e503078e16c18573adecbc086b6a\"\n", + " \"receipt_token\": \"e013c252746cbc5ceb00b4009500769ceb63389de886137c\"\n", "}\n", "\n", "Data provider 4: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"7e3c80e554cfde23847d9aa2cff1323aa8f411e4033c0562\"\n", + " \"receipt_token\": \"f2f38a3206197dd46b53c4c6da079527552d7c6e24b9b63e\"\n", "}\n", "\n", "Data provider 5: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"8bde91367ee52b5c6804d5ce2d2d3350ce3c3766b8625bbc\"\n", + " \"receipt_token\": \"e489cf14d65b211dd6c8b98b1a902f04e3b09c0e3da21a44\"\n", "}\n", "\n" ] @@ -237,15 +237,15 @@ { "data": { "text/plain": [ - "{'current_stage': {'description': 'waiting for CLKs',\n", - " 'number': 1,\n", - " 'progress': {'absolute': 5,\n", - " 'description': 'number of parties already contributed',\n", - " 'relative': 1.0}},\n", + "{'current_stage': {'description': 'compute similarity scores',\n", + " 'number': 2,\n", + " 'progress': {'absolute': 31440720,\n", + " 'description': 'number of already computed similarity scores',\n", + " 'relative': 0.2984721650891483}},\n", " 'stages': 3,\n", - " 'state': 'queued',\n", - " 'time_added': '2019-06-23T11:17:27.646642+00:00',\n", - " 'time_started': None}" + " 'state': 'running',\n", + " 'time_added': '2019-11-18T02:52:30.352381+00:00',\n", + " 'time_started': '2019-11-18T02:52:30.373760+00:00'}" ] }, "execution_count": 6, @@ -285,12 +285,13 @@ } ], "source": [ - "import clkhash.rest_client\n", "from IPython.display import clear_output\n", - "\n", - "for update in clkhash.rest_client.watch_run_status(SERVER, project_id, run_id, result_token, timeout=30):\n", + "from clkhash.rest_client import RestClient\n", + "from clkhash.rest_client import format_run_status\n", + "rest_client = RestClient(SERVER)\n", + "for update in rest_client.watch_run_status(project_id, run_id, result_token, timeout=300):\n", " clear_output(wait=True)\n", - " print(clkhash.rest_client.format_run_status(update))\n" + " print(format_run_status(update))" ] }, { @@ -315,26 +316,26 @@ { "data": { "text/plain": [ - "[[[0, 3127], [3, 3145], [2, 3152], [1, 3143]],\n", - " [[2, 1653], [3, 1655], [1, 1632], [0, 1673], [4, 1682]],\n", - " [[0, 2726], [1, 2737], [3, 2735]],\n", - " [[1, 837], [3, 864]],\n", - " [[0, 1667], [4, 1676], [1, 1624], [3, 1646]],\n", - " [[1, 1884], [2, 1911], [4, 1926], [0, 1916]],\n", - " [[0, 192], [2, 198]],\n", - " [[3, 328], [4, 330], [0, 350], [2, 351], [1, 345]],\n", - " [[2, 3173], [4, 3176], [3, 3163], [0, 3145], [1, 3161]],\n", - " [[1, 347], [4, 332], [2, 353], [0, 352]],\n", - " [[1, 736], [3, 761], [2, 768], [0, 751], [4, 754]],\n", - " [[1, 342], [2, 349]],\n", - " [[3, 899], [2, 913]],\n", - " [[1, 465], [3, 477]],\n", - " [[0, 285], [1, 293]],\n", - " [[0, 785], [3, 794]],\n", - " [[3, 2394], [4, 2395], [0, 2395]],\n", - " [[1, 1260], [2, 1311], [3, 1281], [4, 1326]],\n", - " [[0, 656], [2, 663]],\n", - " [[1, 2468], [2, 2479]]]" + "[[[0, 287], [2, 293], [4, 277]],\n", + " [[0, 2387], [1, 2386]],\n", + " [[0, 264], [3, 252], [1, 272]],\n", + " [[0, 2496], [4, 2498]],\n", + " [[3, 147], [4, 147]],\n", + " [[3, 815], [4, 812]],\n", + " [[3, 1302], [4, 1343]],\n", + " [[0, 1691], [3, 1674]],\n", + " [[0, 3085], [3, 3117]],\n", + " [[1, 2559], [4, 2545]],\n", + " [[0, 574], [3, 576], [4, 554]],\n", + " [[0, 424], [4, 387]],\n", + " [[1, 1087], [2, 1140]],\n", + " [[1, 468], [2, 489], [3, 482], [4, 469]],\n", + " [[3, 2102], [4, 2115]],\n", + " [[1, 981], [3, 1007]],\n", + " [[0, 696], [3, 704]],\n", + " [[0, 2475], [2, 2501], [1, 2485]],\n", + " [[1, 1034], [2, 1090]],\n", + " [[0, 2785], [4, 2797]]]" ] }, "execution_count": 8, @@ -362,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 9, "metadata": { "pycharm": {} }, @@ -371,89 +372,71 @@ "name": "stdout", "output_type": "stream", "text": [ - "0 ['samual', 'mason', '05-12-1917', 'male', 'pertb', '405808.756', '07 2284 3649']\n", - "3 ['samuAl', 'mason', '05-12-1917', 'male', 'peryh', '4058o8.756', '07 2274 3549']\n", - "2 ['samie', 'mazon', '05-12-1917', 'male', '', '405898.756', '07 2275 3649']\n", - "1 ['zamusl', 'mason', '05-12-2917', 'male', '', '405898.756', '07 2274 2649']\n", + "0 ['mackenzie', 'tremellen', '11-01-2947', 'maoe', 'melbourne', '79469.112', '']\n", + "2 ['mackenzie', 'dremellen', '11-01-2937', 'mals', 'mceloburne', '70469.122', '07 5988 5208']\n", + "4 ['macckenzie', 'tremellen', '', 'malr', 'melbovrne', '70469.122', '07 5988 5208']\n", "\n", - "2 ['thomas', 'burfrod', '08-04-1999', '', 'pertj', '182174.209', '02 3881 9666']\n", - "3 ['thomas', 'burfrod', '09-04-1999', 'male', '', '182174.209', '02 3881 9666']\n", - "1 ['thomas', 'burford', '08-04-19o9', 'mal4', '', '182175.109', '02 3881 9666']\n", - "0 ['thomas', 'burford', '08-04-1999', 'male', 'perth', '182174.109', '02 3881 9666']\n", - "4 ['thomas', 'burf0rd', '08-04-q999', 'mske', 'perrh', '182174.109', '02 3881 9666']\n", + "0 ['sophi', 'couljon', '12-03-1841', 'female', 'sydney', '80972.256', '04 3854 3784']\n", + "1 ['sophie', 'coulson', '12-03-1941', 'female', 'sydney', '80972.356', '04 3854 3784']\n", "\n", - "0 ['kaitlin', 'bondza', '03-08-1961', 'male', 'sydney', '41168.999', '02 4632 1380']\n", - "1 ['kaitlin', 'bondja', '03-08-1961', 'malr', 'sydmey', '41168.999', '02 4632 1370']\n", - "3 [\"k'latlin\", 'bonklza', '03-08-1961', 'male', 'sydaney', '', '02 4632 1380']\n", + "0 ['jasmine', 'clarke', '04-00-2009', 'maje', 'melb0urme', '99853.100', '02 1507 1520']\n", + "3 ['jasmine', 'clarke', '04-09-2009', 'male', 'melbourne', '99853.200', '02 1507 1520']\n", + "1 ['jasminr', 'klarle', '04-99-2009', 'male', 'melbourne', '99863.200', '02 1507 1520']\n", "\n", - "1 ['chr8stian', 'jolly', '22-08-2009', 'male', '', '178371.991', '04 5868 7703']\n", - "3 ['chr8stian', 'jolly', '22-09-2099', 'malr', 'melbokurne', '178271.991', '04 5868 7703']\n", + "0 ['zoel', 'ev', '06-09-1990', 'gemale', 'ysdnvvy', '183366.696', '02 5578 4520']\n", + "4 ['joel', 'everett', '06-09-1990', 'female', 'sydney', '183366.696', '02 5578 4520']\n", "\n", - "0 ['oaklrigh', 'ngvyen', '24-07-1907', 'mslr', 'sydney', '63175.398', '04 9019 6235']\n", - "4 ['oakleith', 'ngvyen', '24-97-1907', 'male', 'sydiney', '63175.498', '04 9019 6235']\n", - "1 ['oajleigh', 'ngryen', '24-07-1007', 'male', 'sydney', '63175.498', '04 9919 6235']\n", - "3 ['oakleigh', 'nguyrn', '34-07-1907', 'male', 'sbdeney', '63175.r98', '04 9019 6235']\n", + "3 ['katelyn', 'matthets', '23-07-1977', '', 'melbourne', '118010.996', '07 9265 9238']\n", + "4 ['kateyln', 'matth4ws', '23-07-1978', 'male', 'melbounre', '118010.996', '07 9265 9238']\n", "\n", - "1 ['georgia', 'nguyen', '06-11-1930', 'male', 'perth', '247847.799', '08 6560 4063']\n", - "2 ['georia', 'nfuyen', '06-11-1930', 'male', 'perrh', '247847.799', '08 6560 4963']\n", - "4 ['geortia', 'nguyea', '06-11-1930', 'male', 'pertb', '247847.798', '08 6560 4063']\n", - "0 ['egorgia', 'nguyqn', '06-11-1930', 'male', 'peryh', '247847.799', '08 6460 4963']\n", + "3 ['max', 'pontifex', '17-07-1930', 'male', 'melbourne', '42337.169', '04 8102 3785']\n", + "4 ['max', 'pontjef', '17-07-1930', 'male', 'melbovrne', '', '04 9102 3785']\n", "\n", - "0 ['connor', 'mcneill', '05-09-1902', 'male', 'sydney', '108473.824', '02 6419 9472']\n", - "2 ['connro', 'mcnell', '05-09-1902', 'male', 'sydnye', '108474.824', '02 6419 9472']\n", + "3 ['talrna', 'seilo', '06-09-1953', 'maoe', '', '55815.962', '03 8568 8024']\n", + "4 ['talezba', 'seib', '06-09-1953', 'male', '', '', '03 8567 8024']\n", "\n", - "3 ['alessandria', 'sherriff', '25-91-1951', 'male', 'melb0urne', '5224r.762', '03 3077 2019']\n", - "4 ['alessandria', 'sherriff', '25-01-1951', 'male', 'melbourne', '52245.762', '03 3077 1019']\n", - "0 ['alessandria', \"sherr'lff\", '25-01-1951', 'malr', 'melbourne', '', '03 3977 1019']\n", - "2 ['alessandria', 'shernff', '25-01-1051', 'mzlr', 'melbourne', '52245.663', '03 3077 1019']\n", - "1 ['alessandrya', 'sherrif', '25-01-1961', 'male', 'jkelbouurne', '52245.762', '03 3077 1019']\n", + "0 ['maddiaon', \"mel'ln\", '21-12-1945', 'male', 'melbouren', '', '02 1963 9316']\n", + "3 ['madklidon', 'meJi7|', '21-12-1945', 'maie', 'melbourne', '98312.180', '02 1964 9316']\n", "\n", - "2 ['harriyon', 'micyelmor', '21-04-1971', 'male', 'pert1>', '291889.942', '04 5633 5749']\n", - "4 ['harri5on', 'micyelkore', '21-04-1971', '', 'pertb', '291880.942', '04 5633 5749']\n", - "3 ['hariso17', 'micelmore', '21-04-1971', 'male', 'pertb', '291880.042', '04 5633 5749']\n", - "0 ['harrison', 'michelmore', '21-04-1981', 'malw', 'preth', '291880.942', '04 5643 5749']\n", - "1 ['harris0n', 'michelmoer', '21-04-1971', '', '', '291880.942', '04 5633 5749']\n", + "0 ['holly', 'reih', '22-06-2009', 'msle', 'syconey', '131184.582', '']\n", + "3 ['holly', 'reicl', '21-06-2009', 'male', 'sydey', '131184.582', '']\n", "\n", - "1 ['alannah', 'gully', '15-04-1903', 'make', 'meobourne', '134518.814', '04 5104 4572']\n", - "4 ['alana', 'gully', '15-04-1903', 'male', 'melbourne', '134518.814', '04 5104 4582']\n", - "2 ['alama', 'gulli', '15-04-1903', 'mald', 'melbourne', '134518.814', '04 5104 5582']\n", - "0 ['alsna', 'gullv', '15-04-1903', 'male', '', '134518.814', '04 5103 4582']\n", + "1 ['jessica', 'peteahsen', '30-07-1940', 'malr', 'mel1>oume', '173806.400', '04 7005 4927']\n", + "4 ['jes5ica', 'peter5en', '30-08-1040', 'male', 'melbourne', '173806.400', '04 7005 49q7']\n", "\n", - "1 ['sraah', 'bates-brownsword', '26-11-1905', 'malr', '', '59685.979', '03 8545 5584']\n", - "3 ['sarah', 'bates-brownswort', '26-11-1905', 'male', '', '59686.879', '03 8545 6584']\n", - "2 ['sara0>', 'bates-browjsword', '26-11-1905', 'male', '', '59685.879', '']\n", - "0 ['saran', 'bates-brownsvvord', '26-11-1905', 'malr', 'sydney', '59685.879', '03 8555 5584']\n", - "4 ['snrah', 'bates-bro2nsword', '26-11-1005', 'male', 'sydney', '58685.879', '03 8545 5584']\n", + "0 ['thomas', 'kositcin', '26-08-1939', 'male', 'melbourne', '43048.734', '07 4737 4471']\n", + "3 ['tomas', 'kosutcin', '26-08-1939', 'msle', 'melbourne', '43048.735', '07 4737 4471']\n", + "4 ['thornas', 'kos9tcin', '26-08-1939', 'male', 'melborune', '43948.734', '07 4737 4471']\n", "\n", - "1 ['beth', 'lette', '18-01-2000', 'female', 'sydney', '179719.049', '07 1868 6031']\n", - "2 ['beth', 'lette', '18-02-2000', 'femal4', 'stdq7ey', '179719.049', '07 1868 6931']\n", + "0 ['sofie', 'ny', '20-10-1933', 'fenale', '', '135685.300', '07 7905 6885']\n", + "4 ['stofia', 'ny', '20-10-q933', 'female', 'sydnev', '135685.300', '07 7905 6885']\n", "\n", - "3 ['tahlia', 'bishlp', '', 'female', 'sydney', '101203.290', '03 886u 1916']\n", - "2 ['ahlia', 'bishpp', '', 'female', 'syriey', '101204.290', '03 8867 1916']\n", + "1 ['sophie', 'mazx9ne', '25-03-2814', 'make', 'melbourne', '36878.525', '08 3679 2653']\n", + "2 ['sofie', 'mazzone', '25-03-2924', 'mals', 'melbourne', '36878.526', '08 3678 2653']\n", "\n", - "1 ['fzachary', 'mydlalc', '20-95-1916', 'male', 'sydney', '121209.129', '08 3807 4717']\n", - "3 ['zachary', 'mydlak', '20-05-1016', 'malr', 'sydhey', '121200.129', '08 3807 4627']\n", + "1 ['stephnaie', 'goldsworthy', '03-06-1958', '', 'canbrrra', '83372.67q', '02 4093 4044']\n", + "2 ['sttepbanie', 'goldsworthy', '03-06-1958', 'mald', 'canbedra', '83372.772', '02 4093 4044']\n", + "3 ['stefanie', 'goldsworthy', '03-06-1958', 'male', 'camberra', '83372.572', '']\n", + "4 ['stefanie', 'go|dsworthy', '03-06-1958', '', 'cabr:erra', '83372.672', '02 4093 4044']\n", "\n", - "0 ['jessica', 'white', '04-07-1979', 'male', 'perth', '385632.266', '04 8026 8748']\n", - "1 ['jezsica', 'whi5e', '05-07-1979', 'male', 'perth', '385632.276', '04 8026 8748']\n", + "3 ['antony', 'riean', '18-01-1908', 'male', 'canberra', '59633.334', '07 2734 8270']\n", + "4 ['anthnoy', 'ryari', '18-01-1908', 'male', 'cajberra', '58633.434', '07 2734 8370']\n", "\n", - "0 ['beriiamin', 'musoluno', '21-0y-1994', 'female', 'sydney', '81857.391', '08 8870 e498']\n", - "3 ['byenzakin', 'musoljno', '21-07-1995', 'female', 'sydney', '81857.392', '']\n", + "1 ['eiahn', 'greeti', '11-0e-1977', 'male', 'melbourne', '68538.966', '03 8798 1825']\n", + "3 ['eirn', 'kreen', '11-04-1977', 'male', 'meluourne', '68548.95y', '03 8798 1825']\n", "\n", - "3 ['ella', 'howie', '26-03-2003', 'male', 'melbourne', '97556.316', '03 3655 1171']\n", - "4 ['ela', 'howie', '26-03-2003', 'male', 'melboirne', '', '03 3555 1171']\n", - "0 ['lela', 'howie', '26-03-2903', 'male', 'melbourhe', '', '03 3655 1171']\n", + "0 ['aleesga', 'nkuyen', '14-06-1068', 'male', 'melbourrie', '122053.275', '02 6678 5223']\n", + "3 ['aleeSa', 'nguyen', '14-o6-1968', 'male', 'mtelbournr', '122053.265', '02 6678 5223']\n", "\n", - "1 ['livia', 'riaj', '13-03-1907', 'malw', 'melbovrne', '73305.107', '07 3846 2530']\n", - "2 ['livia', 'ryank', '13-03-1907', 'malw', 'melbuorne', '73305.107', '07 3946 2630']\n", - "3 ['ltvia', 'ryan', '13-03-1907', 'maoe', 'melbourne', '73305.197', '07 3046 2530']\n", - "4 ['livia', 'ryan', '13-03-1907', 'male', 'melbourne', '73305.107', '07 3946 2530']\n", + "0 ['benjamin', 'bishop', '25-11-1980', 'male', 'sydney', '95170.703', '04 3415 3977']\n", + "2 [\"benzam'ln\", 'bish9p', '25-11-1980', 'msle', 'sydn3v', '95170.703', '04 3415 3977']\n", + "1 ['bennie', 'bishop', '25-11-1980', 'mald', '', '95180.703', '04 3415 3977']\n", "\n", - "0 ['coby', 'ibshop', '', 'msle', 'sydney', '211655.118', '02 0833 7777']\n", - "2 ['coby', 'bishop', '15-08-1948', 'male', 'sydney', '211655.118', '02 9833 7777']\n", + "1 [\"ke'Irx\", 'chappel', '19-05-1966', 'male', '', '138869.396', '']\n", + "2 ['keira', 'chapepl', '19-05-1966', 'male', '', '148869.296', '']\n", "\n", - "1 ['emjkly', 'pareemore', '01-03-2977', 'female', 'rnelbourne', '1644487.925', '03 5761 5483']\n", - "2 ['emiily', 'parremore', '01-03-1977', 'female', 'melbourne', '1644487.925', '03 5761 5483']\n", + "0 ['deagxan', 'zaffino', '22-01-1979', 'femame', 'sydne7', '99746.221', '04 1534 02e5']\n", + "4 ['teagan', 'zaffino', '22-01-1979', 'female', 'sydney', '99746.221', '04 1534 0225']\n", "\n" ] } @@ -498,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "metadata": { "pycharm": {} }, @@ -538,7 +521,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.3" }, "pycharm": { "stem_cell": { From 71c3153cf25b1a5ddf9aaf2258801ecbdd8ec840 Mon Sep 17 00:00:00 2001 From: Guillaume Smith Date: Mon, 18 Nov 2019 14:32:33 +1100 Subject: [PATCH 4/5] Pin anonlink version. --- docs/tutorial/tutorial-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial/tutorial-requirements.txt b/docs/tutorial/tutorial-requirements.txt index dfcba3ac..5d2f8bf8 100644 --- a/docs/tutorial/tutorial-requirements.txt +++ b/docs/tutorial/tutorial-requirements.txt @@ -1,4 +1,4 @@ -clkhash>=0.15.0 +clkhash==0.15.0 ipython matplotlib recordlinkage From 9975451e93a3a733efdddf6732d2c36882d9e884 Mon Sep 17 00:00:00 2001 From: Guillaume Smith Date: Tue, 19 Nov 2019 09:28:02 +1100 Subject: [PATCH 5/5] Update all the schemas in docs and tutorials to v3. --- docs/tutorial/Permutations.ipynb | 281 ++++-- docs/tutorial/Record Linkage API.ipynb | 42 +- docs/tutorial/Similarity Scores.ipynb | 208 ++++- docs/tutorial/data/schema.json | 110 ++- docs/tutorial/data/schema_ABC.json | 49 +- .../multiparty-linkage-with-clkhash.ipynb | 830 +++--------------- 6 files changed, 652 insertions(+), 868 deletions(-) diff --git a/docs/tutorial/Permutations.ipynb b/docs/tutorial/Permutations.ipynb index de1ff6bb..e30adabc 100644 --- a/docs/tutorial/Permutations.ipynb +++ b/docs/tutorial/Permutations.ipynb @@ -82,7 +82,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{\"project_count\": 5938, \"rate\": 2118828, \"status\": \"ok\"}\r\n" + "{\"project_count\": 6534, \"rate\": 2504556, \"status\": \"ok\"}\r\n" ] } ], @@ -294,26 +294,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "Overwriting /tmp/tmp2d7l4ief\n" + "Overwriting /tmp/tmptm0w938k\n" ] } ], "source": [ "%%writefile {schema.name}\n", "{\n", - " \"version\": 1,\n", + " \"version\": 3,\n", " \"clkConfig\": {\n", " \"l\": 1024,\n", - " \"k\": 30,\n", - " \"hash\": {\n", - " \"type\": \"doubleHash\"\n", - " },\n", + " \"xor_folds\": 0,\n", " \"kdf\": {\n", " \"type\": \"HKDF\",\n", " \"hash\": \"SHA256\",\n", - " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", - " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", - " \"keySize\": 64\n", + " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", + " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", + " \"keySize\": 64\n", " }\n", " },\n", " \"features\": [\n", @@ -323,48 +320,189 @@ " },\n", " {\n", " \"identifier\": \"given_name\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"surname\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"street_number\",\n", - " \"format\": { \"type\": \"integer\" },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 0.5, \"missingValue\": {\"sentinel\": \"\"} }\n", + " \"format\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"hashing\": {\n", + " \"missingValue\": {\n", + " \"sentinel\": \"\"\n", + " },\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"address_1\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 0.5 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"address_2\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 0.5 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"suburb\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 0.5 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"postcode\",\n", - " \"format\": { \"type\": \"integer\", \"minimum\": 100, \"maximum\": 9999 },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 0.5 }\n", + " \"format\": {\n", + " \"type\": \"integer\",\n", + " \"minimum\": 100,\n", + " \"maximum\": 9999\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"state\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\", \"maxLength\": 3 },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\",\n", + " \"maxLength\": 3\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"date_of_birth\",\n", - " \"format\": { \"type\": \"integer\" },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 1, \"missingValue\": {\"sentinel\": \"\"} }\n", + " \"format\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"hashing\": {\n", + " \"missingValue\": {\n", + " \"sentinel\": \"\"\n", + " },\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"soc_sec_id\",\n", @@ -399,17 +537,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Credentials will be saved in /tmp/tmp5kxg8nky\n", + "Credentials will be saved in /tmp/tmptneh9xy1\n", "\u001b[31mProject created\u001b[0m\n" ] }, { "data": { "text/plain": [ - "{'project_id': '61ad07b11de00b335e4efdb440d0da061b9e89f0b7d25006',\n", - " 'result_token': 'f75ec8d9142b2ac43c1f43899b663113a260e97a3fc23bca',\n", - " 'update_tokens': ['25df4927eccfb37d11b1ab72dad0abc9b5d9abae9e73f0fc',\n", - " '3aafac5ff5c5024a9f770fb2dc8d87a8ab73a504c0a2a03c']}" + "{'project_id': '12256e29a8ad92c9016ba3e7650888f13d3bfb3bd23cc98a',\n", + " 'result_token': '1a588d384f651e9430ac1bb42196f9fe393ff10e8ec65f48',\n", + " 'update_tokens': ['6111c582a0d6a649480c719adcd258b811da17887849ee00',\n", + " '4239370ce8868a9eb3dc85a85eca243bf593a0cc637a5be8']}" ] }, "execution_count": 7, @@ -464,8 +602,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[31mCLK data written to /tmp/tmpdumz7c8u.json\u001b[0m\n", - "\u001b[31mCLK data written to /tmp/tmpq70k1o_9.json\u001b[0m\n" + "\u001b[31mCLK data written to /tmp/tmp9vdauwh4.json\u001b[0m\n", + "\u001b[31mCLK data written to /tmp/tmpgspffags.json\u001b[0m\n" ] } ], @@ -826,7 +964,7 @@ { "data": { "text/plain": [ - "[1704, 3246, 227, 2913, 1848, 2942, 1358, 3469, 2025, 1349]" + "[2418, 3590, 2340, 1226, 1323, 251, 4696, 2598, 4019, 301]" ] }, "execution_count": 20, @@ -860,7 +998,7 @@ { "data": { "text/plain": [ - "[3095, 4173, 2439, 29, 3016, 493, 764, 746, 1981, 2840]" + "[3183, 4293, 3406, 2808, 4528, 2446, 4606, 1601, 1641, 2062]" ] }, "execution_count": 21, @@ -934,16 +1072,16 @@ { "data": { "text/plain": [ - "['rec-126-org,nikki,jaric,13,renmark street,parish lorne,chester hill,2106,nsw,19520401,9272010\\n',\n", - " 'rec-3413-org,amber,durnin,41,hindmarsh drive,walmount,knoxfield,3634,vic,19251010,8760281\\n',\n", - " 'rec-2774-org,dylan,herbert,150,andrews street,brentwood vlge,sheidow park,5067,vic,19430105,4521571\\n',\n", - " 'rec-249-org,lachlan,shepherd,1,grainger circuit,willandra,adaminaby,7250,nsw,19080122,3139543\\n',\n", - " 'rec-4870-org,carlin,garcia,46,von guerard crescent,,clarendon,2140,nsw,19241024,1512461\\n',\n", - " 'rec-1327-org,emma,copperstone,5,decima circuit,crower,avoca north,2166,nsw,19451208,1538637\\n',\n", - " 'rec-959-org,zac,hand,18,kambalda crescent,mckinnon glen,east maitland,4500,sa,19970725,2709860\\n',\n", - " 'rec-2971-org,jack,highet,26,mathieson crescent,,whittington,3064,vic,19650507,6260000\\n',\n", - " \"rec-4637-org,kydan,gigney,64,o'rourke street,timdoolin,shellharbour,4059,nsw,19760409,3279351\\n\",\n", - " 'rec-2003-org,mitchell,webb,15,denovan circuit,villa 3,connewarre,5091,nsw,19801006,8786441\\n']" + "['rec-3933-org,joshua,rigley,19,east place,kergunyah,kingaroy,3665,vic,19670613,4096438\\n',\n", + " 'rec-1057-org,samara,pringle,7,allan street,bonnie doon,campbelltown,5073,nsw,19560429,3493586\\n',\n", + " 'rec-4035-org,chloe,worm,6,brentnall place,donna valley,karloo,3128,nsw,19000814,9383057\\n',\n", + " 'rec-3793-org,lucy,mccarthy,29,charlton street,warrah lea,bundaberg,4061,qld,19940917,6596660\\n',\n", + " 'rec-27-org,angelina,campbell,161,jackie howe crescent,bugoren,woorim,6052,nsw,19531108,8948230\\n',\n", + " 'rec-2303-org,tahlia,hage,3,maclaurin crescent,,ormond,4740,tas,19190517,6174860\\n',\n", + " 'rec-658-org,david,hobson,14,vagabond crescent,dugout 65,patterson lakes,4880,wa,19010305,7666240\\n',\n", + " 'rec-4484-org,alexandra,clarke,15,parnell road,rsdb 284,nedlands,4014,sa,19890608,7235143\\n',\n", + " 'rec-702-org,barnaby,fleet,4,martley circuit,peak view,ascot vale,3930,sa,19360907,9383837\\n',\n", + " 'rec-3252-org,,campbell,4,dunbar street,delicate nobby street,cloverdale,2528,vic,19480406,8607518\\n']" ] }, "execution_count": 24, @@ -967,16 +1105,16 @@ { "data": { "text/plain": [ - "['rec-126-dup-0,nikki,jaruic,13,renmark street,parishlorne,chester hill,2106,nsw,19520401,9272910\\n',\n", - " 'rec-3413-dup-0,amber,durnin,41,hindmarsh drive,walmoutn,knoxfield,3643,vic,19521010,8760281\\n',\n", - " 'rec-2774-dup-0,dylan,herbert,150,andrews street,brentwoo dvlge,sheido wpark,5067,vic,19430105,4521571\\n',\n", - " 'rec-249-dup-0,lachlan,shephaerd,1,grainger circuit,willandra,adaminaby,7250,nsw,19080122,3139543\\n',\n", - " 'rec-4870-dup-0,carlin,munforti,46,von guerard crescent,,clarendon,2140,nsw,19241024,1512461\\n',\n", - " 'rec-1327-dup-0,emma,copperstone,5,decima circuit,crwer,avoca north,2166,nsw,19451208,1536837\\n',\n", - " 'rec-959-dup-0,zac,,18,kambalda crescent,mckinnon glen,east mailand,4500,sa,19970725,2709860\\n',\n", - " 'rec-2971-dup-0,jack,highet,26,mathieson crescent,,kaleen,3064,vic,19650507,6260000\\n',\n", - " \"rec-4637-dup-0,aurora,gigney,64,o'rourke street,timdoolin,shellharbour,4059,nsw,19760409,7169291\\n\",\n", - " 'rec-2003-dup-0,mitchell,,15,denovan circuit,villa 3,,5091,nsw,19801006,8786441\\n']" + "['rec-3933-dup-0,joshua,rigly,19,east place,kergunyah,kingaroy,3665,vic,19670613,4096438\\n',\n", + " 'rec-1057-dup-0,pringle,samara,7,allan street,bonnie doon,campbelltown,5073,nsw,19560429,3493586\\n',\n", + " 'rec-4035-dup-0,chooe,worm,6,brentnal place,donna valley,karloo,3128,nsw,19000814,9383057\\n',\n", + " 'rec-3793-dup-0,mccarthy,lucy,29,charltonstreet,warrahlea,bundaverg,4061,qld,19940917,6596660\\n',\n", + " 'rec-27-dup-0,angelina,campbell,190,jackie howe crescent,bugoren,woorim,6352,nsw,19531108,8948230\\n',\n", + " 'rec-2303-dup-0,peter,ha ge,3,maclaurin crescent,,ormond,4704,tas,19190517,6174860\\n',\n", + " 'rec-658-dup-0,david,hobsson,14,vagabond cfescent,dugout 65,patterson lakes,4880,wa,19010305,7666240\\n',\n", + " 'rec-4484-dup-0,alexandra,clarke,15,rsd b 284,parnell roa,,4014,sa,19890608,7235143\\n',\n", + " 'rec-702-dup-0,barnay,fleet,4,martley circuit,peak view,ascot vale,3930,sa,19360907,9383837\\n',\n", + " 'rec-3252-dup-0,,campbell,4,dunbar svtreet,delicate nobby street,cloverdale,2528,vic,19480406,8607518\\n']" ] }, "execution_count": 25, @@ -1014,16 +1152,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "Nikki Jaric (rec-126-org) =? Nikki Jaruic (rec-126-dup-0)\n", - "Amber Durnin (rec-3413-org) =? Amber Durnin (rec-3413-dup-0)\n", - "Dylan Herbert (rec-2774-org) =? Dylan Herbert (rec-2774-dup-0)\n", - "Lachlan Shepherd (rec-249-org) =? Lachlan Shephaerd (rec-249-dup-0)\n", - "Carlin Garcia (rec-4870-org) =? Carlin Munforti (rec-4870-dup-0)\n", - "Emma Copperstone (rec-1327-org) =? Emma Copperstone (rec-1327-dup-0)\n", - "Zac Hand (rec-959-org) =? Zac (rec-959-dup-0)\n", - "Jack Highet (rec-2971-org) =? Jack Highet (rec-2971-dup-0)\n", - "Kydan Gigney (rec-4637-org) =? Aurora Gigney (rec-4637-dup-0)\n", - "Mitchell Webb (rec-2003-org) =? Mitchell (rec-2003-dup-0)\n" + "Joshua Rigley (rec-3933-org) =? Joshua Rigly (rec-3933-dup-0)\n", + "Samara Pringle (rec-1057-org) =? Pringle Samara (rec-1057-dup-0)\n", + "Chloe Worm (rec-4035-org) =? Chooe Worm (rec-4035-dup-0)\n", + "Lucy Mccarthy (rec-3793-org) =? Mccarthy Lucy (rec-3793-dup-0)\n", + "Angelina Campbell (rec-27-org) =? Angelina Campbell (rec-27-dup-0)\n", + "Tahlia Hage (rec-2303-org) =? Peter Ha Ge (rec-2303-dup-0)\n", + "David Hobson (rec-658-org) =? David Hobsson (rec-658-dup-0)\n", + "Alexandra Clarke (rec-4484-org) =? Alexandra Clarke (rec-4484-dup-0)\n", + "Barnaby Fleet (rec-702-org) =? Barnay Fleet (rec-702-dup-0)\n", + " Campbell (rec-3252-org) =? Campbell (rec-3252-dup-0)\n" ] } ], @@ -1111,6 +1249,15 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } } }, "nbformat": 4, diff --git a/docs/tutorial/Record Linkage API.ipynb b/docs/tutorial/Record Linkage API.ipynb index d3a7a143..b5e074f1 100644 --- a/docs/tutorial/Record Linkage API.ipynb +++ b/docs/tutorial/Record Linkage API.ipynb @@ -94,7 +94,7 @@ { "data": { "text/plain": [ - "{'project_count': 5940, 'rate': 2162326, 'status': 'ok'}" + "{'project_count': 6535, 'rate': 2504556, 'status': 'ok'}" ] }, "execution_count": 3, @@ -261,8 +261,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "generating CLKs: 100%|██████████| 5.00k/5.00k [00:00<00:00, 12.0kclk/s, mean=643, std=45.7]\n", - "generating CLKs: 100%|██████████| 5.00k/5.00k [00:00<00:00, 12.9kclk/s, mean=631, std=52.9]\n" + "generating CLKs: 100%|██████████| 5.00k/5.00k [00:00<00:00, 10.7kclk/s, mean=643, std=45.7]\n", + "generating CLKs: 100%|██████████| 5.00k/5.00k [00:00<00:00, 13.5kclk/s, mean=631, std=52.9]\n" ] } ], @@ -298,10 +298,10 @@ { "data": { "text/plain": [ - "{'project_id': '1a531657c70bbd156aad2b7247c8f06531941692e5eb8f44',\n", - " 'result_token': '45d0d6b4214c85e94d1cbcc739938d8a247fbf5d3d82d9e4',\n", - " 'update_tokens': ['f19c376896083387d6b9d9a7183b0be5457403cd9b74aa8b',\n", - " 'afd80fc82dbfa8c2fe8a195160783bd4968a828cdb02dde8']}" + "{'project_id': '4dc4e94983cb003e6eaf1dd9e09ece6c4f6b142076bf2ca3',\n", + " 'result_token': '0924d34d7fa1a0dc9b3d04b247698d8c070d0896cdb1c7d8',\n", + " 'update_tokens': ['317b702338d3c9cd8f1d4cdf92bbc67e3a0f9ff019886be5',\n", + " 'df27bf6dc03b2d166cd33f4ac806fef096c6d4eb5103c105']}" ] }, "execution_count": 9, @@ -351,7 +351,7 @@ " 'notes': '',\n", " 'number_parties': 2,\n", " 'parties_contributed': 0,\n", - " 'project_id': '1a531657c70bbd156aad2b7247c8f06531941692e5eb8f44',\n", + " 'project_id': '4dc4e94983cb003e6eaf1dd9e09ece6c4f6b142076bf2ca3',\n", " 'result_type': 'groups',\n", " 'schema': {}}" ] @@ -493,9 +493,9 @@ "{'current_stage': {'description': 'compute output', 'number': 3},\n", " 'stages': 3,\n", " 'state': 'completed',\n", - " 'time_added': '2019-11-18T01:07:32.780334+00:00',\n", - " 'time_completed': '2019-11-18T01:07:33.662834+00:00',\n", - " 'time_started': '2019-11-18T01:07:32.803234+00:00'}" + " 'time_added': '2019-11-18T22:23:20.085746+00:00',\n", + " 'time_completed': '2019-11-18T22:23:20.995602+00:00',\n", + " 'time_started': '2019-11-18T22:23:20.226739+00:00'}" ] }, "execution_count": 15, @@ -588,16 +588,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "a[3928] maps to b[3970]\n", - "a[3872] maps to b[2804]\n", - "a[2907] maps to b[2641]\n", - "a[1232] maps to b[1450]\n", - "a[4810] maps to b[3053]\n", - "a[1121] maps to b[3680]\n", - "a[1620] maps to b[3240]\n", - "a[4708] maps to b[3343]\n", - "a[2088] maps to b[2665]\n", - "a[1691] maps to b[3621]\n", + "a[843] maps to b[4886]\n", + "a[1609] maps to b[3419]\n", + "a[35] maps to b[3965]\n", + "a[524] maps to b[4735]\n", + "a[1242] maps to b[1106]\n", + "a[2398] maps to b[4832]\n", + "a[1125] maps to b[1371]\n", + "a[1218] maps to b[725]\n", + "a[1611] maps to b[1985]\n", + "a[1467] maps to b[4683]\n", "...\n" ] } diff --git a/docs/tutorial/Similarity Scores.ipynb b/docs/tutorial/Similarity Scores.ipynb index 6a2eec06..702bd111 100644 --- a/docs/tutorial/Similarity Scores.ipynb +++ b/docs/tutorial/Similarity Scores.ipynb @@ -100,7 +100,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{\"project_count\": 5942, \"rate\": 2177090, \"status\": \"ok\"}\r\n" + "{\"project_count\": 6536, \"rate\": 2530484, \"status\": \"ok\"}\r\n" ] } ], @@ -312,26 +312,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "Overwriting /tmp/tmp4gg3l5ai\n" + "Overwriting /tmp/tmpp5kob1ay\n" ] } ], "source": [ "%%writefile {schema.name}\n", "{\n", - " \"version\": 1,\n", + " \"version\": 3,\n", " \"clkConfig\": {\n", " \"l\": 1024,\n", - " \"k\": 30,\n", - " \"hash\": {\n", - " \"type\": \"doubleHash\"\n", - " },\n", + " \"xor_folds\": 0,\n", " \"kdf\": {\n", " \"type\": \"HKDF\",\n", " \"hash\": \"SHA256\",\n", - " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", - " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", - " \"keySize\": 64\n", + " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", + " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", + " \"keySize\": 64\n", " }\n", " },\n", " \"features\": [\n", @@ -341,48 +338,189 @@ " },\n", " {\n", " \"identifier\": \"given_name\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"surname\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"street_number\",\n", - " \"format\": { \"type\": \"integer\" },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 1, \"missingValue\": {\"sentinel\": \"\"} }\n", + " \"format\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"hashing\": {\n", + " \"missingValue\": {\n", + " \"sentinel\": \"\"\n", + " },\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"address_1\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"address_2\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"suburb\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"postcode\",\n", - " \"format\": { \"type\": \"integer\", \"minimum\": 100, \"maximum\": 9999 },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"integer\",\n", + " \"minimum\": 100,\n", + " \"maximum\": 9999\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"state\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\", \"maxLength\": 3 },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\",\n", + " \"maxLength\": 3\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"date_of_birth\",\n", - " \"format\": { \"type\": \"integer\" },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 1, \"missingValue\": {\"sentinel\": \"\"} }\n", + " \"format\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"hashing\": {\n", + " \"missingValue\": {\n", + " \"sentinel\": \"\"\n", + " },\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"soc_sec_id\",\n", @@ -416,17 +554,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Credentials will be saved in /tmp/tmpricgd0mg\n", + "Credentials will be saved in /tmp/tmp8pi2emsl\n", "\u001b[31mProject created\u001b[0m\n" ] }, { "data": { "text/plain": [ - "{'project_id': 'edc59eacb29f8321b4fc70b43284110670cfd798da9f0835',\n", - " 'result_token': '99c57eaf488e56dd8a172dbccba891c850d32a9993caf14a',\n", - " 'update_tokens': ['3439a921f03e2d5399040f01ff17394c03a0b55efda350e2',\n", - " '50d034571d13b5de9bdccab09d88bd68c65a6b7ab036b71b']}" + "{'project_id': '500db47fcfed842b47f0ae20f6ba82a66dddc5d4d6e956a7',\n", + " 'result_token': '7c161ffe7873683fd8102a635815d7e7a577612458147c32',\n", + " 'update_tokens': ['1e50b588283e191f79769fc925949baded7c704bca28060d',\n", + " '3b95f9e2a51429738c3ea9338b2c3f05cda6cfcef0c8918c']}" ] }, "execution_count": 8, @@ -474,8 +612,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[31mCLK data written to /tmp/tmpovptfm5h.json\u001b[0m\n", - "\u001b[31mCLK data written to /tmp/tmprqlatxwl.json\u001b[0m\n" + "\u001b[31mCLK data written to /tmp/tmp2_h66ds2.json\u001b[0m\n", + "\u001b[31mCLK data written to /tmp/tmpiyu3o3vv.json\u001b[0m\n" ] } ], diff --git a/docs/tutorial/data/schema.json b/docs/tutorial/data/schema.json index f1153148..fbeebea5 100644 --- a/docs/tutorial/data/schema.json +++ b/docs/tutorial/data/schema.json @@ -1,6 +1,5 @@ - { - "version": 2, + "version": 3, "clkConfig": { "l": 1024, "kdf": { @@ -23,10 +22,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -36,10 +42,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -49,10 +62,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": true, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": true + } } }, { @@ -62,10 +82,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 1, - "positional": true, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 1, + "positional": true + } } }, { @@ -75,10 +102,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -88,10 +122,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 1, - "positional": true, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 7} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 7 + }, + "comparison": { + "type": "ngram", + "n": 1, + "positional": true + } } }, { @@ -101,11 +142,18 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 1, - "positional": true, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 7} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 7 + }, + "comparison": { + "type": "ngram", + "n": 1, + "positional": true + } } } ] -} +} \ No newline at end of file diff --git a/docs/tutorial/data/schema_ABC.json b/docs/tutorial/data/schema_ABC.json index 470e2bb4..612f6dcf 100644 --- a/docs/tutorial/data/schema_ABC.json +++ b/docs/tutorial/data/schema_ABC.json @@ -1,6 +1,5 @@ - { - "version": 2, + "version": 3, "clkConfig": { "l": 1024, "kdf": { @@ -23,9 +22,14 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "strategy": {"k": 15} + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -35,9 +39,14 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "strategy": {"k": 15} + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -47,9 +56,14 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": true, - "strategy": {"k": 15} + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": true + } } }, { @@ -59,9 +73,14 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 1, - "positional": true, - "strategy": {"k": 8} + "strategy": { + "bitsPerToken": 8 + }, + "comparison": { + "type": "ngram", + "n": 1, + "positional": true + } } }, { @@ -69,4 +88,4 @@ "ignored": true } ] -} +} \ No newline at end of file diff --git a/docs/tutorial/multiparty-linkage-with-clkhash.ipynb b/docs/tutorial/multiparty-linkage-with-clkhash.ipynb index 2e77fea0..20ddd0ed 100644 --- a/docs/tutorial/multiparty-linkage-with-clkhash.ipynb +++ b/docs/tutorial/multiparty-linkage-with-clkhash.ipynb @@ -4,7 +4,9 @@ "cell_type": "code", "execution_count": 1, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [], "source": [ @@ -18,7 +20,11 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "pycharm": { + "is_executing": false + } + }, "outputs": [], "source": [ "SECRET = 'my_secret'\n", @@ -57,15 +63,17 @@ "cell_type": "code", "execution_count": 3, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ "keys: my_secret\n" - ] + ], + "output_type": "stream" } ], "source": [ @@ -76,87 +84,17 @@ "cell_type": "code", "execution_count": 4, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ - "\n", - "{\n", - " \"version\": 2,\n", - " \"clkConfig\": {\n", - " \"l\": 1024,\n", - " \"kdf\": {\n", - " \"type\": \"HKDF\",\n", - " \"hash\": \"SHA256\",\n", - " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", - " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", - " \"keySize\": 64\n", - " }\n", - " },\n", - " \"features\": [\n", - " {\n", - " \"identifier\": \"id\",\n", - " \"ignored\": true\n", - " },\n", - " {\n", - " \"identifier\": \"givenname\",\n", - " \"format\": {\n", - " \"type\": \"string\",\n", - " \"encoding\": \"utf-8\"\n", - " },\n", - " \"hashing\": {\n", - " \"ngram\": 2,\n", - " \"positional\": false,\n", - " \"strategy\": {\"k\": 15}\n", - " }\n", - " },\n", - " {\n", - " \"identifier\": \"surname\",\n", - " \"format\": {\n", - " \"type\": \"string\",\n", - " \"encoding\": \"utf-8\"\n", - " },\n", - " \"hashing\": {\n", - " \"ngram\": 2,\n", - " \"positional\": false,\n", - " \"strategy\": {\"k\": 15}\n", - " }\n", - " },\n", - " {\n", - " \"identifier\": \"dob\",\n", - " \"format\": {\n", - " \"type\": \"string\",\n", - " \"encoding\": \"utf-8\"\n", - " },\n", - " \"hashing\": {\n", - " \"ngram\": 2,\n", - " \"positional\": true,\n", - " \"strategy\": {\"k\": 15}\n", - " }\n", - " },\n", - " {\n", - " \"identifier\": \"phone number\",\n", - " \"format\": {\n", - " \"type\": \"string\",\n", - " \"encoding\": \"utf-8\"\n", - " },\n", - " \"hashing\": {\n", - " \"ngram\": 1,\n", - " \"positional\": true,\n", - " \"strategy\": {\"k\": 8}\n", - " }\n", - " },\n", - " {\n", - " \"identifier\": \"ignoredForLinkage\",\n", - " \"ignored\": true\n", - " }\n", - " ]\n", - "}\n", - "\n" - ] + "{\n \"version\": 3,\n \"clkConfig\": {\n \"l\": 1024,\n \"kdf\": {\n \"type\": \"HKDF\",\n \"hash\": \"SHA256\",\n \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n \"keySize\": 64\n }\n },\n \"features\": [\n {\n \"identifier\": \"id\",\n \"ignored\": true\n },\n {\n \"identifier\": \"givenname\",\n \"format\": {\n \"type\": \"string\",\n \"encoding\": \"utf-8\"\n },\n \"hashing\": {\n \"strategy\": {\n \"bitsPerToken\": 15\n },\n \"comparison\": {\n \"type\": \"ngram\",\n \"n\": 2,\n \"positional\": false\n }\n }\n },\n {\n \"identifier\": \"surname\",\n \"format\": {\n \"type\": \"string\",\n \"encoding\": \"utf-8\"\n },\n \"hashing\": {\n \"strategy\": {\n \"bitsPerToken\": 15\n },\n \"comparison\": {\n \"type\": \"ngram\",\n \"n\": 2,\n \"positional\": false\n }\n }\n },\n {\n \"identifier\": \"dob\",\n \"format\": {\n \"type\": \"string\",\n \"encoding\": \"utf-8\"\n },\n \"hashing\": {\n \"strategy\": {\n \"bitsPerToken\": 15\n },\n \"comparison\": {\n \"type\": \"ngram\",\n \"n\": 2,\n \"positional\": true\n }\n }\n },\n {\n \"identifier\": \"phone number\",\n \"format\": {\n \"type\": \"string\",\n \"encoding\": \"utf-8\"\n },\n \"hashing\": {\n \"strategy\": {\n \"bitsPerToken\": 8\n },\n \"comparison\": {\n \"type\": \"ngram\",\n \"n\": 1,\n \"positional\": true\n }\n }\n },\n {\n \"identifier\": \"ignoredForLinkage\",\n \"ignored\": true\n }\n ]\n}\n" + ], + "output_type": "stream" } ], "source": [ @@ -179,100 +117,19 @@ "cell_type": "code", "execution_count": 5, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgivennamesurnamedobphone numbergender
00tarahilton27-08-194108 2210 0298male
13saJivernre22-12-297202 1090 1906mals
27sliverpaciorekNaNNaNmals
39rubygeorge09-05-193907 4698 6255male
410eyrinmcampbell29-1q-198308 299y 1535male
\n", - "
" - ], - "text/plain": [ - " id givenname surname dob phone number gender\n", - "0 0 tara hilton 27-08-1941 08 2210 0298 male\n", - "1 3 saJi vernre 22-12-2972 02 1090 1906 mals\n", - "2 7 sliver paciorek NaN NaN mals\n", - "3 9 ruby george 09-05-1939 07 4698 6255 male\n", - "4 10 eyrinm campbell 29-1q-1983 08 299y 1535 male" - ] + "text/plain": " id givenname surname dob phone number gender\n0 0 tara hilton 27-08-1941 08 2210 0298 male\n1 3 saJi vernre 22-12-2972 02 1090 1906 mals\n2 7 sliver paciorek NaN NaN mals\n3 9 ruby george 09-05-1939 07 4698 6255 male\n4 10 eyrinm campbell 29-1q-1983 08 299y 1535 male", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idgivennamesurnamedobphone numbergender
00tarahilton27-08-194108 2210 0298male
13saJivernre22-12-297202 1090 1906mals
27sliverpaciorekNaNNaNmals
39rubygeorge09-05-193907 4698 6255male
410eyrinmcampbell29-1q-198308 299y 1535male
\n
" }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 5 } ], "source": [ @@ -292,100 +149,19 @@ "cell_type": "code", "execution_count": 6, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgivennamesurnamedobphone numbercity
03zaliverner22-12-197202 1090 1906perth
14samueltremellen21-12-192303 3605 9336melbourne
25amylodge16-01-195807 8286 9372canberra
37oIjipacioerk10-02-195904 4220 5949sydney
410erinkampgell29-12-198308 2996 1445perth
\n", - "
" - ], - "text/plain": [ - " id givenname surname dob phone number city\n", - "0 3 zali verner 22-12-1972 02 1090 1906 perth\n", - "1 4 samuel tremellen 21-12-1923 03 3605 9336 melbourne\n", - "2 5 amy lodge 16-01-1958 07 8286 9372 canberra\n", - "3 7 oIji pacioerk 10-02-1959 04 4220 5949 sydney\n", - "4 10 erin kampgell 29-12-1983 08 2996 1445 perth" - ] + "text/plain": " id givenname surname dob phone number city\n0 3 zali verner 22-12-1972 02 1090 1906 perth\n1 4 samuel tremellen 21-12-1923 03 3605 9336 melbourne\n2 5 amy lodge 16-01-1958 07 8286 9372 canberra\n3 7 oIji pacioerk 10-02-1959 04 4220 5949 sydney\n4 10 erin kampgell 29-12-1983 08 2996 1445 perth", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idgivennamesurnamedobphone numbercity
03zaliverner22-12-197202 1090 1906perth
14samueltremellen21-12-192303 3605 9336melbourne
25amylodge16-01-195807 8286 9372canberra
37oIjipacioerk10-02-195904 4220 5949sydney
410erinkampgell29-12-198308 2996 1445perth
\n
" }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 6 } ], "source": [ @@ -405,100 +181,19 @@ "cell_type": "code", "execution_count": 7, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgivennamesurnamedobphone numberincome
01joshuaarkwright16-02-190304 8511 958070189.446
13zal:verner22-12-197202 1090 190650194.118
27oliyerpaciorwk10-02-195904 4210 594931750.993
38nacoyaranson17-08-192507 6033 4580102446.131
410erihcampbell29-12-1i8308 299t 1435331476.599
\n", - "
" - ], - "text/plain": [ - " id givenname surname dob phone number income\n", - "0 1 joshua arkwright 16-02-1903 04 8511 9580 70189.446\n", - "1 3 zal: verner 22-12-1972 02 1090 1906 50194.118\n", - "2 7 oliyer paciorwk 10-02-1959 04 4210 5949 31750.993\n", - "3 8 nacoya ranson 17-08-1925 07 6033 4580 102446.131\n", - "4 10 erih campbell 29-12-1i83 08 299t 1435 331476.599" - ] + "text/plain": " id givenname surname dob phone number income\n0 1 joshua arkwright 16-02-1903 04 8511 9580 70189.446\n1 3 zal: verner 22-12-1972 02 1090 1906 50194.118\n2 7 oliyer paciorwk 10-02-1959 04 4210 5949 31750.993\n3 8 nacoya ranson 17-08-1925 07 6033 4580 102446.131\n4 10 erih campbell 29-12-1i83 08 299t 1435 331476.599", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idgivennamesurnamedobphone numberincome
01joshuaarkwright16-02-190304 8511 958070189.446
13zal:verner22-12-197202 1090 190650194.118
27oliyerpaciorwk10-02-195904 4210 594931750.993
38nacoyaranson17-08-192507 6033 4580102446.131
410erihcampbell29-12-1i8308 299t 1435331476.599
\n
" }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 7 } ], "source": [ @@ -520,15 +215,17 @@ "cell_type": "code", "execution_count": 8, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ "\u001b[31mProject created\u001b[0m\r\n" - ] + ], + "output_type": "stream" } ], "source": [ @@ -557,15 +254,17 @@ "cell_type": "code", "execution_count": 9, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ "\u001b[31mCLK data written to dataset-alice-hashed.json\u001b[0m\r\n" - ] + ], + "output_type": "stream" } ], "source": [ @@ -576,15 +275,17 @@ "cell_type": "code", "execution_count": 10, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ - "{\"message\": \"Updated\", \"receipt_token\": \"e3c9aa8e058d268258c82e9dd604292a3dcc45189a7776a7\"}" - ] + "{\"message\": \"Updated\", \"receipt_token\": \"372a1a7f5cdc639ec3dfb98475573bb796212149e50a5116\"}" + ], + "output_type": "stream" } ], "source": [ @@ -604,15 +305,17 @@ "cell_type": "code", "execution_count": 11, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ "\u001b[31mCLK data written to dataset-bob-hashed.json\u001b[0m\r\n" - ] + ], + "output_type": "stream" } ], "source": [ @@ -623,15 +326,17 @@ "cell_type": "code", "execution_count": 12, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ - "{\"message\": \"Updated\", \"receipt_token\": \"2d0c17c4afd5160ca6d0f47d813d13528da7ff8844eb739d\"}" - ] + "{\"message\": \"Updated\", \"receipt_token\": \"85126409e52f61cdaa5f761a28644707bd17fbcf17bb1e4d\"}" + ], + "output_type": "stream" } ], "source": [ @@ -651,15 +356,17 @@ "cell_type": "code", "execution_count": 13, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ "\u001b[31mCLK data written to dataset-charlie-hashed.json\u001b[0m\r\n" - ] + ], + "output_type": "stream" } ], "source": [ @@ -670,15 +377,17 @@ "cell_type": "code", "execution_count": 14, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ - "{\"message\": \"Updated\", \"receipt_token\": \"31f615a0c69dcdada77362dfc471ca74c9c025eeb1e942b3\"}" - ] + "{\"message\": \"Updated\", \"receipt_token\": \"bc348c187f2f3fe0e179bd1ffcfa96ced642dabced79723a\"}" + ], + "output_type": "stream" } ], "source": [ @@ -700,7 +409,9 @@ "cell_type": "code", "execution_count": 15, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [], "source": [ @@ -724,22 +435,19 @@ "cell_type": "code", "execution_count": 16, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ - "\u001b[31mState: completed\n", - "Stage (3/3): compute output\u001b[0m\n", - "\u001b[31mState: completed\n", - "Stage (3/3): compute output\u001b[0m\n", - "\u001b[31mState: completed\n", - "Stage (3/3): compute output\u001b[0m\n", - "\u001b[31mDownloading result\u001b[0m\n", - "\u001b[31mReceived result\u001b[0m\n" - ] + "\u001b[31mState: completed\r\nStage (3/3): compute output\u001b[0m\r\n", + "\u001b[31mState: completed\r\nStage (3/3): compute output\u001b[0m\r\n\u001b[31mState: completed\r\nStage (3/3): compute output\u001b[0m\r\n\u001b[31mDownloading result\u001b[0m\r\n", + "\u001b[31mReceived result\u001b[0m\r\n" + ], + "output_type": "stream" } ], "source": [ @@ -750,7 +458,9 @@ "cell_type": "code", "execution_count": 17, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [], "source": [ @@ -774,7 +484,9 @@ "cell_type": "code", "execution_count": 18, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [], "source": [ @@ -798,117 +510,19 @@ "cell_type": "code", "execution_count": 19, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
gendercityincome
0msl5sydnev
1malemelbourne
2malecanbrrra
3femalr277039.294
4perth125343.406
5mlebourne56899.522
6malecanberra
7femalecan1>erra
8maleacbbeera81191.584
9maoemesllootrne
\n", - "
" - ], - "text/plain": [ - " gender city income\n", - "0 msl5 sydnev \n", - "1 male melbourne \n", - "2 male canbrrra \n", - "3 femalr 277039.294\n", - "4 perth 125343.406\n", - "5 mlebourne 56899.522\n", - "6 male canberra \n", - "7 female can1>erra \n", - "8 male acbbeera 81191.584\n", - "9 maoe mesllootrne " - ] + "text/plain": " gender city income\n0 male sydney \n1 male canbrrra \n2 femake sydn4v \n3 pertb 21407e.192\n4 femake sydriey \n5 mlebourne 56899.522\n6 male canberra \n7 female 44652.704\n8 male sydnely \n9 male 65381.450", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
gendercityincome
0malesydney
1malecanbrrra
2femakesydn4v
3pertb21407e.192
4femakesydriey
5mlebourne56899.522
6malecanberra
7female44652.704
8malesydnely
9male65381.450
\n
" }, - "execution_count": 19, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 19 } ], "source": [ @@ -933,33 +547,19 @@ "cell_type": "code", "execution_count": 20, "metadata": { - "pycharm": {}, + "pycharm": { + "is_executing": false + }, "scrolled": true }, "outputs": [ { "data": { - "text/plain": [ - "[[[0, 2196], [2, 2203]],\n", - " [[1, 399], [2, 401], [0, 414]],\n", - " [[1, 772], [2, 811], [0, 795]],\n", - " [[0, 2868], [2, 2886]],\n", - " [[2, 2335], [0, 948]],\n", - " [[0, 1900], [1, 1866]],\n", - " [[0, 2482], [1, 2494], [2, 2509]],\n", - " [[0, 88], [2, 95], [1, 86]],\n", - " [[0, 1740], [1, 1693], [2, 1736]],\n", - " [[0, 536], [2, 525], [1, 512]],\n", - " [[0, 2489], [1, 2501], [2, 2516]],\n", - " [[0, 1176], [1, 1121]],\n", - " [[1, 1393], [2, 1421], [0, 1451]],\n", - " [[0, 658], [2, 666], [1, 645]],\n", - " [[0, 2317], [2, 2324], [1, 2311]]]" - ] + "text/plain": "[[[1, 2065], [0, 2428]],\n [[0, 1740], [1, 1693], [2, 1736]],\n [[1, 2224], [2, 2236]],\n [[0, 565], [1, 557], [2, 564]],\n [[0, 1980], [1, 1953]],\n [[0, 536], [2, 525], [1, 512]],\n [[1, 171], [2, 175], [0, 169]],\n [[0, 2234], [1, 2228], [2, 2242]],\n [[0, 918], [2, 2840]],\n [[0, 2461], [2, 2479], [1, 2468]],\n [[0, 2451], [2, 2471], [1, 2458]],\n [[0, 230], [1, 232]],\n [[0, 2765], [2, 2794], [1, 2789]],\n [[0, 1758], [2, 1754], [1, 1712]],\n [[1, 351], [2, 356]]]" }, - "execution_count": 20, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 20 } ], "source": [ @@ -981,7 +581,9 @@ "cell_type": "code", "execution_count": 21, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [], "source": [ @@ -1005,200 +607,20 @@ "cell_type": "code", "execution_count": 22, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false, + "name": "#%%\n" + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgiven namesurnamedobphone numbernon-linking
64504201cazsandralock20-19-190408 9282 1556femaoe
64514201cassandralock20-10-190408 9282 1556brisbane
6452
64534914runybriten01-01-197103 7339 6523malw
64544914rubybritten01-01-197103 7338 6523melbourne
64554914rubybr'ltten01-01-197103 7337 652359544.220
6456
64572802brettmaynsrd04-09-196803 3038 1109msle
64582802br4ttmaynard04-09-186703 3038 2109sydney
64592802brettmaynard04-09-196703 3038 1109152975.865
6460
64617378bradleycampbekl15-07-200903 2052 7063malw
64627378bratcampbell15-07-200903 2062 7063cnaberfa
64637378bradelycampbiol15-07-200903 2052 706364924.120
6464
\n", - "
" - ], - "text/plain": [ - " id given name surname dob phone number non-linking\n", - "6450 4201 cazsandra lock 20-19-1904 08 9282 1556 femaoe\n", - "6451 4201 cassandra lock 20-10-1904 08 9282 1556 brisbane\n", - "6452 \n", - "6453 4914 runy briten 01-01-1971 03 7339 6523 malw\n", - "6454 4914 ruby britten 01-01-1971 03 7338 6523 melbourne\n", - "6455 4914 ruby br'ltten 01-01-1971 03 7337 6523 59544.220\n", - "6456 \n", - "6457 2802 brett maynsrd 04-09-1968 03 3038 1109 msle\n", - "6458 2802 br4tt maynard 04-09-1867 03 3038 2109 sydney\n", - "6459 2802 brett maynard 04-09-1967 03 3038 1109 152975.865\n", - "6460 \n", - "6461 7378 bradley campbekl 15-07-2009 03 2052 7063 malw\n", - "6462 7378 brat campbell 15-07-2009 03 2062 7063 cnaberfa\n", - "6463 7378 bradely campbiol 15-07-2009 03 2052 7063 64924.120\n", - "6464 " - ] + "text/plain": " id given name surname dob phone number non-linking\n6450 \n6451 1522 poahtia torpe 22-09-1999 07 6482 4546 femalr\n6452 1522 portia thorpe 22-09-1999 07 6482 4546 canberra\n6453 \n6454 8662 luct pulfort 05-03-1903 02 0726 9479 male\n6455 8662 lucy pulford 05-03-1903 melbourrie\n6456 8662 lusy pulford 05-03-1993 02 0726 0489 192230.309\n6457 \n6458 5797 chelsie pajc0ek 27-03-1961 07 3258 9992 male\n6459 5797 chel5i padci4 27-04-1961 07 3258 0991 sydney\n6460 5797 chelsie pasl\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idgiven namesurnamedobphone numbernon-linking
6450
64511522poahtiatorpe22-09-199907 6482 4546femalr
64521522portiathorpe22-09-199907 6482 4546canberra
6453
64548662luctpulfort05-03-190302 0726 9479male
64558662lucypulford05-03-1903melbourrie
64568662lusypulford05-03-199302 0726 0489192230.309
6457
64585797chelsiepajc0ek27-03-196107 3258 9992male
64595797chel5ipadci427-04-196107 3258 0991sydney
64605797chelsiepasl<oe27-94-196107 3258 089262334.690
6461
64621885nicholasrobson06-01-191402 7799 6803canberra
64631885nicho|asrobson06-91-191402 7799 680361333.218
6464
\n" }, - "execution_count": 22, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 22 } ], "source": [ @@ -1208,7 +630,8 @@ " table.append([dataset_alice, dataset_bob, dataset_charlie][i][j])\n", " table.append([''] * 6)\n", " \n", - "pd.DataFrame(table, columns=['id', 'given name', 'surname', 'dob', 'phone number', 'non-linking']).tail(15)" + "pd.DataFrame(table, columns=['id', 'given name', 'surname', 'dob', 'phone number', 'non-linking']).tail(15)\n", + "\n" ] } ], @@ -1229,8 +652,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file