diff --git a/backend/requirements.txt b/backend/requirements.txt index d0d51014..42e464eb 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,7 +1,7 @@ anonlink==0.12.5 bitmath==1.3.1.2 celery==4.3.0 -clkhash==0.14.0 +clkhash==0.15.0 colorama==0.4.1 # required for structlog connexion==1.4 Flask-Opentracing==0.2.0 diff --git a/docs/tutorial/Permutations.ipynb b/docs/tutorial/Permutations.ipynb index 7e3690d9..e30adabc 100644 --- a/docs/tutorial/Permutations.ipynb +++ b/docs/tutorial/Permutations.ipynb @@ -82,7 +82,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{\"project_count\": 1021, \"rate\": 2453247, \"status\": \"ok\"}\n" + "{\"project_count\": 6534, \"rate\": 2504556, \"status\": \"ok\"}\r\n" ] } ], @@ -294,26 +294,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "Overwriting /tmp/tmptfalxkiq\n" + "Overwriting /tmp/tmptm0w938k\n" ] } ], "source": [ "%%writefile {schema.name}\n", "{\n", - " \"version\": 1,\n", + " \"version\": 3,\n", " \"clkConfig\": {\n", " \"l\": 1024,\n", - " \"k\": 30,\n", - " \"hash\": {\n", - " \"type\": \"doubleHash\"\n", - " },\n", + " \"xor_folds\": 0,\n", " \"kdf\": {\n", " \"type\": \"HKDF\",\n", " \"hash\": \"SHA256\",\n", - " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", - " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", - " \"keySize\": 64\n", + " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", + " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", + " \"keySize\": 64\n", " }\n", " },\n", " \"features\": [\n", @@ -323,48 +320,189 @@ " },\n", " {\n", " \"identifier\": \"given_name\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"surname\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"street_number\",\n", - " \"format\": { \"type\": \"integer\" },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 0.5, \"missingValue\": {\"sentinel\": \"\"} }\n", + " \"format\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"hashing\": {\n", + " \"missingValue\": {\n", + " \"sentinel\": \"\"\n", + " },\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"address_1\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 0.5 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"address_2\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 0.5 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"suburb\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 0.5 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"postcode\",\n", - " \"format\": { \"type\": \"integer\", \"minimum\": 100, \"maximum\": 9999 },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 0.5 }\n", + " \"format\": {\n", + " \"type\": \"integer\",\n", + " \"minimum\": 100,\n", + " \"maximum\": 9999\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 15\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"state\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\", \"maxLength\": 3 },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\",\n", + " \"maxLength\": 3\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"date_of_birth\",\n", - " \"format\": { \"type\": \"integer\" },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 1, \"missingValue\": {\"sentinel\": \"\"} }\n", + " \"format\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"hashing\": {\n", + " \"missingValue\": {\n", + " \"sentinel\": \"\"\n", + " },\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"soc_sec_id\",\n", @@ -399,17 +537,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Credentials will be saved in /tmp/tmpyr8dc2pf\n", + "Credentials will be saved in /tmp/tmptneh9xy1\n", "\u001b[31mProject created\u001b[0m\n" ] }, { "data": { "text/plain": [ - "{'project_id': 'b8211d1450c8d0d631dbdc1fb482af106b8cbdebed5b7fd3',\n", - " 'result_token': '8fe1fc01f7ac3a3406d1e031b7d120800aa6460d0da62abb',\n", - " 'update_tokens': ['1c39c6972626bd34729812f0b9cf6e467461824dbbd0682c',\n", - " '901c12061cf621b67df5b9de2719b8806636364d3fdc1765']}" + "{'project_id': '12256e29a8ad92c9016ba3e7650888f13d3bfb3bd23cc98a',\n", + " 'result_token': '1a588d384f651e9430ac1bb42196f9fe393ff10e8ec65f48',\n", + " 'update_tokens': ['6111c582a0d6a649480c719adcd258b811da17887849ee00',\n", + " '4239370ce8868a9eb3dc85a85eca243bf593a0cc637a5be8']}" ] }, "execution_count": 7, @@ -446,7 +584,7 @@ "At the moment both data providers have *raw* personally identiy information. We first have to generate CLKs from the raw entity information. We need:\n", "- the *clkhash* library\n", "- the linkage schema from above\n", - "- and two secret passwords which are only known to Alice and Bob. (here: `horse` and `staple`)\n", + "- and a secret which is only known to Alice and Bob. (here: `my_secret`)\n", "\n", "Please see [clkhash](https://clkhash.readthedocs.io/) documentation for further details on this." ] @@ -464,16 +602,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "generating CLKs: 100%|█| 5.00k/5.00k [00:01<00:00, 1.32kclk/s, mean=765, std=37.1]\n", - "\u001b[31mCLK data written to /tmp/tmpc_4k553j.json\u001b[0m\n", - "generating CLKs: 100%|█| 5.00k/5.00k [00:01<00:00, 4.28kclk/s, mean=756, std=43.3]\n", - "\u001b[31mCLK data written to /tmp/tmpv7eo2tfp.json\u001b[0m\n" + "\u001b[31mCLK data written to /tmp/tmp9vdauwh4.json\u001b[0m\n", + "\u001b[31mCLK data written to /tmp/tmpgspffags.json\u001b[0m\n" ] } ], "source": [ - "!clkutil hash \"{a_csv.name}\" horse staple \"{schema.name}\" \"{a_clks.name}\"\n", - "!clkutil hash \"{b_csv.name}\" horse staple \"{schema.name}\" \"{b_clks.name}\"" + "!clkutil hash \"{a_csv.name}\" my_secret \"{schema.name}\" \"{a_clks.name}\"\n", + "!clkutil hash \"{b_csv.name}\" my_secret \"{schema.name}\" \"{b_clks.name}\"" ] }, { @@ -498,22 +634,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "Usage: clkutil upload [OPTIONS] CLK_JSON\n", - "\n", - " Upload CLK data to entity matching server.\n", - "\n", - " Given a json file containing hashed clk data as CLK_JSON, upload to the\n", - " entity resolution service.\n", - "\n", - " Use \"-\" to read from stdin.\n", - "\n", - "Options:\n", - " --project TEXT Project identifier\n", - " --apikey TEXT Authentication API key for the server.\n", - " --server TEXT Server address including protocol\n", - " -o, --output FILENAME\n", - " -v, --verbose Script is more talkative\n", - " --help Show this message and exit.\n" + "Usage: clkutil upload [OPTIONS] CLK_JSON\r\n", + "\r\n", + " Upload CLK data to entity matching server.\r\n", + "\r\n", + " Given a json file containing hashed clk data as CLK_JSON, upload to the\r\n", + " entity resolution service.\r\n", + "\r\n", + " Use \"-\" to read from stdin.\r\n", + "\r\n", + "Options:\r\n", + " --project TEXT Project identifier\r\n", + " --apikey TEXT Authentication API key for the server.\r\n", + " -o, --output FILENAME\r\n", + " --server TEXT Server address including protocol. Default\r\n", + " https://testing.es.data61.xyz.\r\n", + " --retry-multiplier INTEGER If receives a 503 from\r\n", + " server, minimum waiting time before\r\n", + " retrying. Default 100.\r\n", + " --retry-exponential-max INTEGER\r\n", + " If receives a 503 from\r\n", + " server, maximum time interval between\r\n", + " retries. Default 10000.\r\n", + " --retry-max-time INTEGER If receives a 503 from\r\n", + " server, retry only within this period.\r\n", + " Default 20000.\r\n", + " -v, --verbose Script is more talkative\r\n", + " --help Show this message and exit.\r\n" ] } ], @@ -653,7 +800,8 @@ "outputs": [], "source": [ "import requests\n", - "import clkhash.rest_client\n", + "from clkhash.rest_client import RestClient\n", + "from clkhash.rest_client import format_run_status\n", "\n", "from IPython.display import clear_output" ] @@ -677,9 +825,10 @@ } ], "source": [ - "for update in clkhash.rest_client.watch_run_status(url, project_id, run_id, credentials['result_token'], timeout=300):\n", + "rest_client = RestClient(url)\n", + "for update in rest_client.watch_run_status(project_id, run_id, credentials['result_token'], timeout=300):\n", " clear_output(wait=True)\n", - " print(clkhash.rest_client.format_run_status(update))" + " print(format_run_status(update))" ] }, { @@ -759,7 +908,7 @@ { "data": { "text/plain": [ - "4858" + "4851" ] }, "execution_count": 18, @@ -815,7 +964,7 @@ { "data": { "text/plain": [ - "[2333, 1468, 559, 274, 653, 3385, 278, 3568, 3617, 4356]" + "[2418, 3590, 2340, 1226, 1323, 251, 4696, 2598, 4019, 301]" ] }, "execution_count": 20, @@ -849,7 +998,7 @@ { "data": { "text/plain": [ - "[2083, 1106, 3154, 1180, 2582, 375, 3533, 1046, 316, 2427]" + "[3183, 4293, 3406, 2808, 4528, 2446, 4606, 1601, 1641, 2062]" ] }, "execution_count": 21, @@ -923,16 +1072,16 @@ { "data": { "text/plain": [ - "['rec-2689-org,ainsley,robison,23,atherton street,villa 1/4,deer park,3418,nsw,19310531,4102867\\n',\n", - " 'rec-1056-org,chloe,imgraben,47,curlewis crescent,dragon rising,burleigh waters,2680,qld,19520516,6111417\\n',\n", - " 'rec-1820-org,liam,cullens,121,chandler street,the burrows,safety bay,3073,qld,19910811,7828812\\n',\n", - " 'rec-2192-org,ellie,fearnall,31,fishburn street,colbara,cherrybrook,5171,wa,,7745948\\n',\n", - " 'rec-2696-org,campbell,nguyen,6,diselma place,villa 2,collinswood,4343,nsw,19630325,2861961\\n',\n", - " 'rec-968-org,aidan,blake,15,namatjira drive,cooramin,dromana,4074,vic,19270928,4317464\\n',\n", - " 'rec-3833-org,nicholas,clarke,13,gaylard place,tryphinia view,wetherill park,2810,nsw,19041223,3927795\\n',\n", - " 'rec-4635-org,isabella,white,8,cooling place,,rosebud,6151,sa,19990911,2206317\\n',\n", - " 'rec-3549-org,harry,thorpe,11,kambalda crescent,louisa tor 4,angaston,2777,qld,19421128,2701790\\n',\n", - " 'rec-1220-org,lauren,weltman,6,tewksbury circuit,heritage estate,evans head,6330,nsw,19840930,9462453\\n']" + "['rec-3933-org,joshua,rigley,19,east place,kergunyah,kingaroy,3665,vic,19670613,4096438\\n',\n", + " 'rec-1057-org,samara,pringle,7,allan street,bonnie doon,campbelltown,5073,nsw,19560429,3493586\\n',\n", + " 'rec-4035-org,chloe,worm,6,brentnall place,donna valley,karloo,3128,nsw,19000814,9383057\\n',\n", + " 'rec-3793-org,lucy,mccarthy,29,charlton street,warrah lea,bundaberg,4061,qld,19940917,6596660\\n',\n", + " 'rec-27-org,angelina,campbell,161,jackie howe crescent,bugoren,woorim,6052,nsw,19531108,8948230\\n',\n", + " 'rec-2303-org,tahlia,hage,3,maclaurin crescent,,ormond,4740,tas,19190517,6174860\\n',\n", + " 'rec-658-org,david,hobson,14,vagabond crescent,dugout 65,patterson lakes,4880,wa,19010305,7666240\\n',\n", + " 'rec-4484-org,alexandra,clarke,15,parnell road,rsdb 284,nedlands,4014,sa,19890608,7235143\\n',\n", + " 'rec-702-org,barnaby,fleet,4,martley circuit,peak view,ascot vale,3930,sa,19360907,9383837\\n',\n", + " 'rec-3252-org,,campbell,4,dunbar street,delicate nobby street,cloverdale,2528,vic,19480406,8607518\\n']" ] }, "execution_count": 24, @@ -956,16 +1105,16 @@ { "data": { "text/plain": [ - "['rec-2689-dup-0,ainsley,labalck,23,atherto n street,villa 1/4,deer park,3418,nsw,19310531,4102867\\n',\n", - " 'rec-1056-dup-0,james,imgrapen,47,curlewiscrescent,dragon rising,burleigh waters,2680,qld,19520516,6111417\\n',\n", - " 'rec-1820-dup-0,liam,cullens,121,chandlerw street,the burrows,safety bay,3073,qld,19910811,7828812\\n',\n", - " 'rec-2192-dup-0,elpie,fearnull,31,fishbunestreet,,cherrybrook,5171,wa,,7745948\\n',\n", - " 'rec-2696-dup-0,jenna,nguyen,85,diselmaplace,villz2,collinswood,4343,nsw,19630325,2861961\\n',\n", - " 'rec-968-dup-0,aidan,blake,15,namatjifra drive,cooramin,dromana,4074,vic,19270928,4317464\\n',\n", - " 'rec-3833-dup-0,nicholas,clarke,,gaylard place,tryphinia view,wetherill park,2810,nsw,19041223,3972795\\n',\n", - " 'rec-4635-dup-0,isaeblla,white,8,cooling place,massey green,rosebud,6151,sa,19990911,2206317\\n',\n", - " 'rec-3549-dup-0,taylor,thorpe,11,kambalda c rescent,louisa tor 4,angasgon,2777,qld,19421128,2701790\\n',\n", - " 'rec-1220-dup-0,lauren,welman,6,tewksburl circuit,heritage estate,evans head,6330,nsw,19840930,9462453\\n']" + "['rec-3933-dup-0,joshua,rigly,19,east place,kergunyah,kingaroy,3665,vic,19670613,4096438\\n',\n", + " 'rec-1057-dup-0,pringle,samara,7,allan street,bonnie doon,campbelltown,5073,nsw,19560429,3493586\\n',\n", + " 'rec-4035-dup-0,chooe,worm,6,brentnal place,donna valley,karloo,3128,nsw,19000814,9383057\\n',\n", + " 'rec-3793-dup-0,mccarthy,lucy,29,charltonstreet,warrahlea,bundaverg,4061,qld,19940917,6596660\\n',\n", + " 'rec-27-dup-0,angelina,campbell,190,jackie howe crescent,bugoren,woorim,6352,nsw,19531108,8948230\\n',\n", + " 'rec-2303-dup-0,peter,ha ge,3,maclaurin crescent,,ormond,4704,tas,19190517,6174860\\n',\n", + " 'rec-658-dup-0,david,hobsson,14,vagabond cfescent,dugout 65,patterson lakes,4880,wa,19010305,7666240\\n',\n", + " 'rec-4484-dup-0,alexandra,clarke,15,rsd b 284,parnell roa,,4014,sa,19890608,7235143\\n',\n", + " 'rec-702-dup-0,barnay,fleet,4,martley circuit,peak view,ascot vale,3930,sa,19360907,9383837\\n',\n", + " 'rec-3252-dup-0,,campbell,4,dunbar svtreet,delicate nobby street,cloverdale,2528,vic,19480406,8607518\\n']" ] }, "execution_count": 25, @@ -1003,16 +1152,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "Ainsley Robison (rec-2689-org) =? Ainsley Labalck (rec-2689-dup-0)\n", - "Chloe Imgraben (rec-1056-org) =? James Imgrapen (rec-1056-dup-0)\n", - "Liam Cullens (rec-1820-org) =? Liam Cullens (rec-1820-dup-0)\n", - "Ellie Fearnall (rec-2192-org) =? Elpie Fearnull (rec-2192-dup-0)\n", - "Campbell Nguyen (rec-2696-org) =? Jenna Nguyen (rec-2696-dup-0)\n", - "Aidan Blake (rec-968-org) =? Aidan Blake (rec-968-dup-0)\n", - "Nicholas Clarke (rec-3833-org) =? Nicholas Clarke (rec-3833-dup-0)\n", - "Isabella White (rec-4635-org) =? Isaeblla White (rec-4635-dup-0)\n", - "Harry Thorpe (rec-3549-org) =? Taylor Thorpe (rec-3549-dup-0)\n", - "Lauren Weltman (rec-1220-org) =? Lauren Welman (rec-1220-dup-0)\n" + "Joshua Rigley (rec-3933-org) =? Joshua Rigly (rec-3933-dup-0)\n", + "Samara Pringle (rec-1057-org) =? Pringle Samara (rec-1057-dup-0)\n", + "Chloe Worm (rec-4035-org) =? Chooe Worm (rec-4035-dup-0)\n", + "Lucy Mccarthy (rec-3793-org) =? Mccarthy Lucy (rec-3793-dup-0)\n", + "Angelina Campbell (rec-27-org) =? Angelina Campbell (rec-27-dup-0)\n", + "Tahlia Hage (rec-2303-org) =? Peter Ha Ge (rec-2303-dup-0)\n", + "David Hobson (rec-658-org) =? David Hobsson (rec-658-dup-0)\n", + "Alexandra Clarke (rec-4484-org) =? Alexandra Clarke (rec-4484-dup-0)\n", + "Barnaby Fleet (rec-702-org) =? Barnay Fleet (rec-702-dup-0)\n", + " Campbell (rec-3252-org) =? Campbell (rec-3252-dup-0)\n" ] } ], @@ -1054,9 +1203,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Found 4858 correct matches out of 5000. Incorrectly linked 0 matches.\n", + "Found 4851 correct matches out of 5000. Incorrectly linked 0 matches.\n", "Precision: 100.0%\n", - "Recall: 97.2%\n" + "Recall: 97.0%\n" ] } ], @@ -1100,6 +1249,15 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } } }, "nbformat": 4, diff --git a/docs/tutorial/Record Linkage API.ipynb b/docs/tutorial/Record Linkage API.ipynb index c957c7b1..b5e074f1 100644 --- a/docs/tutorial/Record Linkage API.ipynb +++ b/docs/tutorial/Record Linkage API.ipynb @@ -70,10 +70,10 @@ "outputs": [ { "name": "stdout", + "output_type": "stream", "text": [ "Testing anonlink-entity-service hosted at https://testing.es.data61.xyz/api/v1/\n" - ], - "output_type": "stream" + ] } ], "source": [ @@ -93,11 +93,13 @@ "outputs": [ { "data": { - "text/plain": "{'project_count': 7871, 'rate': 301990, 'status': 'ok'}" + "text/plain": [ + "{'project_count': 6535, 'rate': 2504556, 'status': 'ok'}" + ] }, + "execution_count": 3, "metadata": {}, - "output_type": "execute_result", - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ @@ -187,32 +189,50 @@ "outputs": [], "source": [ "import clkhash\n", + "from clkhash.comparators import *\n", "from clkhash.field_formats import *\n", "schema = clkhash.randomnames.NameList.SCHEMA\n", "_missing = MissingValueSpec(sentinel='')\n", "schema.fields = [\n", " Ignore('rec_id'),\n", - " StringSpec('given_name', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " StringSpec('surname', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " IntegerSpec('street_number', \n", - " FieldHashingProperties(ngram=1, \n", - " positional=True, \n", - " k=15, \n", - " missing_value=_missing)),\n", - " StringSpec('address_1', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " StringSpec('address_2', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " StringSpec('suburb', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " IntegerSpec('postcode', \n", - " FieldHashingProperties(ngram=1, positional=True, k=15)),\n", - " StringSpec('state', \n", - " FieldHashingProperties(ngram=2, k=15)),\n", - " IntegerSpec('date_of_birth', \n", - " FieldHashingProperties(ngram=1, positional=True, k=15, missing_value=_missing)),\n", + " StringSpec('given_name',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " StringSpec('surname',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " IntegerSpec('street_number',\n", + " FieldHashingProperties(\n", + " NgramComparison(1, positional=True),\n", + " BitsPerTokenStrategy(15),\n", + " missing_value=_missing)),\n", + " StringSpec('address_1',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " StringSpec('address_2',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " StringSpec('suburb',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " IntegerSpec('postcode',\n", + " FieldHashingProperties(\n", + " NgramComparison(1, positional=True),\n", + " BitsPerTokenStrategy(15))),\n", + " StringSpec('state',\n", + " FieldHashingProperties(\n", + " NgramComparison(2),\n", + " BitsPerTokenStrategy(15))),\n", + " IntegerSpec('date_of_birth',\n", + " FieldHashingProperties(\n", + " NgramComparison(1, positional=True),\n", + " BitsPerTokenStrategy(15),\n", + " missing_value=_missing)),\n", " Ignore('soc_sec_id')\n", "]" ] @@ -239,27 +259,20 @@ "outputs": [ { "name": "stderr", + "output_type": "stream", "text": [ - "\rgenerating CLKs: 0%| | 0.00/5.00k [00:00" + "text/plain": [ + "" + ] }, + "execution_count": 20, "metadata": {}, - "output_type": "execute_result", - "execution_count": 20 + "output_type": "execute_result" } ], "source": [ @@ -626,13 +676,6 @@ " \"{}/projects/{}\".format(url, project_id), \n", " headers={\"Authorization\": credentials['result_token']})" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -651,18 +694,18 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.3" }, "pycharm": { "stem_cell": { "cell_type": "raw", - "source": [], "metadata": { "collapsed": false - } + }, + "source": [] } } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/docs/tutorial/Similarity Scores.ipynb b/docs/tutorial/Similarity Scores.ipynb index 63174a6d..702bd111 100644 --- a/docs/tutorial/Similarity Scores.ipynb +++ b/docs/tutorial/Similarity Scores.ipynb @@ -100,7 +100,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{\"project_count\": 2115, \"rate\": 7737583, \"status\": \"ok\"}\r\n" + "{\"project_count\": 6536, \"rate\": 2530484, \"status\": \"ok\"}\r\n" ] } ], @@ -312,26 +312,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "Overwriting /tmp/tmpvlivqdcf\n" + "Overwriting /tmp/tmpp5kob1ay\n" ] } ], "source": [ "%%writefile {schema.name}\n", "{\n", - " \"version\": 1,\n", + " \"version\": 3,\n", " \"clkConfig\": {\n", " \"l\": 1024,\n", - " \"k\": 30,\n", - " \"hash\": {\n", - " \"type\": \"doubleHash\"\n", - " },\n", + " \"xor_folds\": 0,\n", " \"kdf\": {\n", " \"type\": \"HKDF\",\n", " \"hash\": \"SHA256\",\n", - " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", - " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", - " \"keySize\": 64\n", + " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", + " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", + " \"keySize\": 64\n", " }\n", " },\n", " \"features\": [\n", @@ -341,48 +338,189 @@ " },\n", " {\n", " \"identifier\": \"given_name\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"surname\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"street_number\",\n", - " \"format\": { \"type\": \"integer\" },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 1, \"missingValue\": {\"sentinel\": \"\"} }\n", + " \"format\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"hashing\": {\n", + " \"missingValue\": {\n", + " \"sentinel\": \"\"\n", + " },\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"address_1\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"address_2\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"suburb\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\" },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\"\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"postcode\",\n", - " \"format\": { \"type\": \"integer\", \"minimum\": 100, \"maximum\": 9999 },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"integer\",\n", + " \"minimum\": 100,\n", + " \"maximum\": 9999\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"state\",\n", - " \"format\": { \"type\": \"string\", \"encoding\": \"utf-8\", \"maxLength\": 3 },\n", - " \"hashing\": { \"ngram\": 2, \"weight\": 1 }\n", + " \"format\": {\n", + " \"type\": \"string\",\n", + " \"encoding\": \"utf-8\",\n", + " \"maxLength\": 3\n", + " },\n", + " \"hashing\": {\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 2,\n", + " \"positional\": false\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"date_of_birth\",\n", - " \"format\": { \"type\": \"integer\" },\n", - " \"hashing\": { \"ngram\": 1, \"positional\": true, \"weight\": 1, \"missingValue\": {\"sentinel\": \"\"} }\n", + " \"format\": {\n", + " \"type\": \"integer\"\n", + " },\n", + " \"hashing\": {\n", + " \"missingValue\": {\n", + " \"sentinel\": \"\"\n", + " },\n", + " \"strategy\": {\n", + " \"bitsPerToken\": 30\n", + " },\n", + " \"hash\": {\n", + " \"type\": \"doubleHash\"\n", + " },\n", + " \"comparison\": {\n", + " \"type\": \"ngram\",\n", + " \"n\": 1,\n", + " \"positional\": true\n", + " }\n", + " }\n", " },\n", " {\n", " \"identifier\": \"soc_sec_id\",\n", @@ -416,17 +554,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "Credentials will be saved in /tmp/tmpcwpvq6kj\n", + "Credentials will be saved in /tmp/tmp8pi2emsl\n", "\u001b[31mProject created\u001b[0m\n" ] }, { "data": { "text/plain": [ - "{'project_id': '1eb3da44f73440c496ab42217381181de55e9dcd6743580c',\n", - " 'result_token': '846c6c25097c7794131de0d3e2c39c04b7de9688acedc383',\n", - " 'update_tokens': ['52aae3f1dfa8a4ec1486d8f7d63a8fe708876b39a8ec585b',\n", - " '92e2c9c1ce52a2c2493b5e22953600735a07553f7d00a704']}" + "{'project_id': '500db47fcfed842b47f0ae20f6ba82a66dddc5d4d6e956a7',\n", + " 'result_token': '7c161ffe7873683fd8102a635815d7e7a577612458147c32',\n", + " 'update_tokens': ['1e50b588283e191f79769fc925949baded7c704bca28060d',\n", + " '3b95f9e2a51429738c3ea9338b2c3f05cda6cfcef0c8918c']}" ] }, "execution_count": 8, @@ -474,16 +612,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "generating CLKs: 100%|█| 5.00k/5.00k [00:01<00:00, 1.06kclk/s, mean=883, std=33.6]\n", - "\u001b[31mCLK data written to /tmp/tmpj8m1dvxj.json\u001b[0m\n", - "generating CLKs: 100%|█| 5.00k/5.00k [00:01<00:00, 1.30kclk/s, mean=875, std=39.7]\n", - "\u001b[31mCLK data written to /tmp/tmpi2y_ogl9.json\u001b[0m\n" + "\u001b[31mCLK data written to /tmp/tmp2_h66ds2.json\u001b[0m\n", + "\u001b[31mCLK data written to /tmp/tmpiyu3o3vv.json\u001b[0m\n" ] } ], "source": [ - "!clkutil hash \"{a_csv.name}\" horse staple \"{schema.name}\" \"{a_clks.name}\"\n", - "!clkutil hash \"{b_csv.name}\" horse staple \"{schema.name}\" \"{b_clks.name}\"" + "!clkutil hash \"{a_csv.name}\" secret \"{schema.name}\" \"{a_clks.name}\"\n", + "!clkutil hash \"{b_csv.name}\" secret \"{schema.name}\" \"{b_clks.name}\"" ] }, { @@ -629,25 +765,26 @@ "text": [ "State: completed\n", "Stage (2/2): compute similarity scores\n", - "Progress: 1.000%\n" + "Progress: 100.00%\n" ] } ], "source": [ - "for update in clkhash.rest_client.watch_run_status(url, project_id, run_id, credentials['result_token'], timeout=300):\n", + "from clkhash.rest_client import RestClient\n", + "from clkhash.rest_client import format_run_status\n", + "rest_client = RestClient(url)\n", + "for update in rest_client.watch_run_status(project_id, run_id, credentials['result_token'], timeout=300):\n", " clear_output(wait=True)\n", - " print(clkhash.rest_client.format_run_status(update))\n", - "time.sleep(3)" + " print(format_run_status(update))" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "data = json.loads(clkhash.rest_client.run_get_result_text(\n", - " url, \n", + "data = json.loads(rest_client.run_get_result_text(\n", " project_id, \n", " run_id, \n", " credentials['result_token']))['similarity_scores']" @@ -664,7 +801,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": { "pycharm": { "is_executing": false @@ -704,7 +841,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 16, "metadata": { "pycharm": { "is_executing": false @@ -714,10 +851,10 @@ { "data": { "text/plain": [ - "1572906" + "1150393" ] }, - "execution_count": 19, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -737,7 +874,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 17, "metadata": { "pycharm": { "is_executing": false @@ -746,7 +883,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAElFJREFUeJzt3W+QnWd53/HvD9kmbSG1HG89RhJZN4hpxYsIujWmKY0Lgy3saQVtSkynQbieKpnYM2EmeSGSF05JPeO0BQYmxFMnVjFMwHESUjSxUqM4MDQdjC2DMZZVx4sRYynCViJD8DClkXP1xbkFJ2JXe3b37Dla39/PzJl9zvX8OfelI52fnj/n2VQVkqT+vGjaA5AkTYcBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASerUedMewNlcfPHFNTs7O+1hSNK68tBDD/15Vc0stdw5HQCzs7McPHhw2sOQpHUlyddGWc5DQJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTSwZAkh9I8kCSLyU5lOQ/tvplST6fZD7Jbye5oNVf3J7Pt/mzQ9t6d6s/nuTqtWpKkrS0UfYAvgO8oap+FNgO7EhyBfCrwPur6hXAs8ANbfkbgGdb/f1tOZJsA64DXgXsAH49yYZxNiNJGt2SAVADz7Wn57dHAW8AfrfV7wTe0qZ3tue0+W9Mkla/q6q+U1VfBeaBy8fShSRp2Ub6JnD7n/pDwCuADwFfAb5RVafaIkeBTW16E/AUQFWdSvJN4Ida/f6hzQ6vsyZm99yzYP3Irdeu5ctK0row0kngqnq+qrYDmxn8r/0frNWAkuxOcjDJwRMnTqzVy0hS95Z1FVBVfQP4NPA64MIkp/cgNgPH2vQxYAtAm/93gb8Yri+wzvBr3F5Vc1U1NzOz5L2MJEkrNMpVQDNJLmzTfwt4E3CYQRD8RFtsF/DJNr2vPafN/+Oqqla/rl0ldBmwFXhgXI1IkpZnlHMAlwJ3tvMALwLurqo/SPIYcFeS/wR8EbijLX8H8NEk88BJBlf+UFWHktwNPAacAm6squfH244kaVRLBkBVPQK8eoH6kyxwFU9V/V/g3yyyrVuAW5Y/TEnSuPlNYEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ1aMgCSbEny6SSPJTmU5Oda/ZeTHEvycHtcM7TOu5PMJ3k8ydVD9R2tNp9kz9q0JEkaxXkjLHMK+Pmq+kKSlwIPJTnQ5r2/qv7r8MJJtgHXAa8CXgb8UZJXttkfAt4EHAUeTLKvqh4bRyPLMbvnngXrR269dsIjkaTpWTIAquo4cLxNfyvJYWDTWVbZCdxVVd8BvppkHri8zZuvqicBktzVlp14AEiSlnkOIMks8Grg8610U5JHkuxNsrHVNgFPDa12tNUWq5/5GruTHExy8MSJE8sZniRpGUYOgCQvAX4PeFdV/SVwG/AjwHYGewjvHceAqur2qpqrqrmZmZlxbFKStIBRzgGQ5HwGH/6/VVWfAKiqp4fm/wbwB+3pMWDL0OqbW42z1CVJEzbKVUAB7gAOV9X7huqXDi32VuDRNr0PuC7Ji5NcBmwFHgAeBLYmuSzJBQxOFO8bTxuSpOUaZQ/gx4CfAr6c5OFW+0Xg7Um2AwUcAX4aoKoOJbmbwcndU8CNVfU8QJKbgHuBDcDeqjo0xl4kScswylVAfwJkgVn7z7LOLcAtC9T3n209SdLk+E1gSeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUqfOmPYBzyeyeexasH7n12gmPRJLWnnsAktQpA0CSOrVkACTZkuTTSR5LcijJz7X6RUkOJHmi/dzY6knywSTzSR5J8pqhbe1qyz+RZNfatSVJWsooewCngJ+vqm3AFcCNSbYBe4D7qmorcF97DvBmYGt77AZug0FgADcDrwUuB24+HRqSpMlbMgCq6nhVfaFNfws4DGwCdgJ3tsXuBN7SpncCH6mB+4ELk1wKXA0cqKqTVfUscADYMdZuJEkjW9Y5gCSzwKuBzwOXVNXxNuvrwCVtehPw1NBqR1ttsbokaQpGDoAkLwF+D3hXVf3l8LyqKqDGMaAku5McTHLwxIkT49ikJGkBIwVAkvMZfPj/VlV9opWfbod2aD+fafVjwJah1Te32mL1v6Gqbq+quaqam5mZWU4vkqRlGOUqoAB3AIer6n1Ds/YBp6/k2QV8cqj+jnY10BXAN9uhonuBq5JsbCd/r2o1SdIUjPJN4B8Dfgr4cpKHW+0XgVuBu5PcAHwNeFubtx+4BpgHvg1cD1BVJ5P8CvBgW+49VXVyLF1IkpZtyQCoqj8BssjsNy6wfAE3LrKtvcDe5QxQkrQ2/CawJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSp0a5HXT3Zvfcs2D9yK3XTngkkjQ+7gFIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdWjIAkuxN8kySR4dqv5zkWJKH2+OaoXnvTjKf5PEkVw/Vd7TafJI9429FkrQco+wBfBjYsUD9/VW1vT32AyTZBlwHvKqt8+tJNiTZAHwIeDOwDXh7W1aSNCVL3g20qj6bZHbE7e0E7qqq7wBfTTIPXN7mzVfVkwBJ7mrLPrbsEUuSxmI15wBuSvJIO0S0sdU2AU8NLXO01RarS5KmZKUBcBvwI8B24Djw3nENKMnuJAeTHDxx4sS4NitJOsOKAqCqnq6q56vqr4Hf4HuHeY4BW4YW3dxqi9UX2vbtVTVXVXMzMzMrGZ4kaQQr+o1gSS6tquPt6VuB01cI7QM+luR9wMuArcADQICtSS5j8MF/HfBvVzPwc4G/KUzSerZkACT5OHAlcHGSo8DNwJVJtgMFHAF+GqCqDiW5m8HJ3VPAjVX1fNvOTcC9wAZgb1UdGns3kqSRjXIV0NsXKN9xluVvAW5ZoL4f2L+s0UmS1ozfBJakThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnq1HnTHsAL0eyeexasH7n12gmPRJIW5x6AJHXKAJCkThkAktQpA0CSOrVkACTZm+SZJI8O1S5KciDJE+3nxlZPkg8mmU/ySJLXDK2zqy3/RJJda9OOJGlUo+wBfBjYcUZtD3BfVW0F7mvPAd4MbG2P3cBtMAgM4GbgtcDlwM2nQ0OSNB1LBkBVfRY4eUZ5J3Bnm74TeMtQ/SM1cD9wYZJLgauBA1V1sqqeBQ7w/aEiSZqglZ4DuKSqjrfprwOXtOlNwFNDyx1ttcXq3yfJ7iQHkxw8ceLECocnSVrKqk8CV1UBNYaxnN7e7VU1V1VzMzMz49qsJOkMKw2Ap9uhHdrPZ1r9GLBlaLnNrbZYXZI0JSsNgH3A6St5dgGfHKq/o10NdAXwzXao6F7gqiQb28nfq1pNkjQlS94LKMnHgSuBi5McZXA1z63A3UluAL4GvK0tvh+4BpgHvg1cD1BVJ5P8CvBgW+49VXXmiWVJ0gRlcAj/3DQ3N1cHDx5c8fqL3ZTtXONN4iSNU5KHqmpuqeX8JrAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnlrwdtNbeYnct9S6hktaSewCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnVnUvoCRHgG8BzwOnqmouyUXAbwOzwBHgbVX1bJIAHwCuAb4NvLOqvrCa13+h8x5BktbSOPYA/nlVba+qufZ8D3BfVW0F7mvPAd4MbG2P3cBtY3htSdIKrcUhoJ3AnW36TuAtQ/WP1MD9wIVJLl2D15ckjWC1AVDAp5I8lGR3q11SVcfb9NeBS9r0JuCpoXWPtpokaQpW+/sA/mlVHUvy94ADSf7P8MyqqiS1nA22INkN8PKXv3yVw5MkLWZVewBVdaz9fAb4feBy4OnTh3baz2fa4seALUOrb261M7d5e1XNVdXczMzMaoYnSTqLFQdAkr+T5KWnp4GrgEeBfcCuttgu4JNteh/wjgxcAXxz6FCRJGnCVnMI6BLg9wdXd3Ie8LGq+p9JHgTuTnID8DXgbW35/QwuAZ1ncBno9at4bUnSKq04AKrqSeBHF6j/BfDGBeoF3LjS15MkjZe/FH4dWuwLYuCXxCSNzltBSFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE55GegLjL9DQNKo3AOQpE4ZAJLUKQNAkjplAEhSpzwJ3AlPDks6k3sAktQpA0CSOmUASFKnDABJ6pQBIEmd8iqgznl1kNQv9wAkqVPuAWhB7hlIL3zuAUhSpwwASeqUh4C0LB4akl443AOQpE5NfA8gyQ7gA8AG4Der6tZJj0Hj556BtP5MNACSbAA+BLwJOAo8mGRfVT02yXFocgwG6dw16T2Ay4H5qnoSIMldwE7AAOiMwSBN36QDYBPw1NDzo8BrJzwGncMWC4aVMEykszvnrgJKshvY3Z4+l+TxVWzuYuDPVz+qdaW3nhftN7864ZFMTm/vMdjzcv3wKAtNOgCOAVuGnm9ute+qqtuB28fxYkkOVtXcOLa1XvTWc2/9gj33YhI9T/oy0AeBrUkuS3IBcB2wb8JjkCQx4T2AqjqV5CbgXgaXge6tqkOTHIMkaWDi5wCqaj+wf0IvN5ZDSetMbz331i/Ycy/WvOdU1Vq/hiTpHOStICSpU+syAJLsSPJ4kvkkexaY/8NJ7kvySJLPJNk8NG9XkifaY9dkR75yK+05yfYkn0tyqM37ycmPfmVW8z63+T+Y5GiSX5vcqFdnlX+3X57kU0kOJ3ksyewkx75Sq+z5P7e/24eTfDBJJjv65UuyN8kzSR5dZH5aL/Ot59cMzRvv51dVrasHg5PHXwH+PnAB8CVg2xnL/A6wq02/Afhom74IeLL93NimN067pzXu+ZXA1jb9MuA4cOG0e1rLnofmfwD4GPBr0+5nEj0DnwHe1KZfAvztafe0lj0D/wT4320bG4DPAVdOu6cRev5nwGuARxeZfw3wh0CAK4DPt/rYP7/W4x7Ad28nUVX/Dzh9O4lh24A/btOfHpp/NXCgqk5W1bPAAWDHBMa8Wivuuar+tKqeaNN/BjwDzExk1KuzmveZJP8IuAT41ATGOi4r7jnJNuC8qjoAUFXPVdW3JzPsVVnN+1zADzAIjhcD5wNPr/mIV6mqPgucPMsiO4GP1MD9wIVJLmUNPr/WYwAsdDuJTWcs8yXgX7XptwIvTfJDI657LlpNz9+V5HIG/1i+skbjHKcV95zkRcB7gV9Y81GO12re51cC30jyiSRfTPJf2s0Xz3Ur7rmqPscgEI63x71VdXiNxzsJi/2ZjP3zaz0GwCh+AfjxJF8EfpzBt42fn+6Q1txZe27/g/gocH1V/fV0hjh2i/X8s8D+qjo6zcGtkcV6Pg94fZv/jxkcUnnnlMY4bgv2nOQVwD9kcEeBTcAbkrx+esNcf865ewGNYJTbSfwZ7X8MSV4C/Ouq+kaSY8CVZ6z7mbUc7JisuOf2/AeBe4BfaruU68Fq3ufXAa9P8rMMjoVfkOS5qvq+E4znmNX0fBR4uL53p93/weD48R2TGPgqrKbn/wDcX1XPtXl/CLwO+F+TGPgaWuzPZPyfX9M+IbKCEyjnMTj5cRnfO2n0qjOWuRh4UZu+BXjP0EmUrzI4gbKxTV807Z7WuOcLgPuAd027j0n1fMYy72T9nARezfu8oS0/057/d+DGafe0xj3/JPBHbRvnt7/n/2LaPY3Y9yyLnwS+lr95EviBVh/759fU/yBW+Id3DfCnDI5l/1KrvQf4l236J4An2jK/Cbx4aN1/D8y3x/XT7mWtewb+HfBXwMNDj+3T7met3+ehbaybAFhtzwx+0dIjwJeBDwMXTLufteyZQej9N+Awg98p8r5p9zJivx9ncM7irxgcx78B+BngZ9r8MPjFWV9p7+Xc0Lpj/fzym8CS1KkX6klgSdISDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjr1/wHNa9U2GtFvqQAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAPGElEQVR4nO3df6xfd13H8eeLjmEUyDpbm9lt3KklsfzhnHUMFZkQtm6LFtDwI1HKXKyEkUgif1T5Y2aEpGrAsIALVSobEciMII0rjlohqGGwImPsh9DL6LLWshYL6LJEAd/+8f0UvnT3trf3+6t3n+cj+eZ7vp/zOef7efd7+zrnnnO+56aqkCT14WmzHoAkaXoMfUnqiKEvSR0x9CWpI4a+JHXknFkP4FTWrFlTc3Nzsx6GJK0on/vc575eVWsXmndWh/7c3Bz79++f9TAkaUVJ8shi8zy8I0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTmrv5E7qrntdy7YfnDHdVMeiSSdHdzTl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SerIaUM/yUVJPpHkwSQPJPm91n5+kr1JDrTn1a09SW5JMp/kviSXDa1ra+t/IMnWyZUlSVrIUvb0vwP8flVtBK4AbkyyEdgO7KuqDcC+9hrgGmBDe2wDboXBRgK4CXg+cDlw04kNhSRpOk4b+lV1pKr+rU3/N/AQsB7YAtzWut0GvKxNbwFur4G7gfOSXABcDeytquNV9Q1gL7B5rNVIkk7pjI7pJ5kDfhb4DLCuqo60WV8D1rXp9cCjQ4sdam2LtZ/8HtuS7E+y/9ixY2cyPEnSaSw59JM8E/hb4E1V9V/D86qqgBrHgKpqZ1VtqqpNa9euHccqJUnNkkI/ydMZBP5fV9WHW/Nj7bAN7floaz8MXDS0+IWtbbF2SdKULOXqnQDvBR6qqncMzdoNnLgCZyvw0aH217areK4AvtUOA90FXJVkdTuBe1VrkyRNyTlL6POLwG8BX0xyb2v7Q2AHcEeSG4BHgFe2eXuAa4F54AngeoCqOp7krcA9rd/NVXV8LFVIkpbktKFfVf8CZJHZL1mgfwE3LrKuXcCuMxmgJGl8/EauJHXE0Jekjhj6ktQRQ1+SOrKUq3eecua237lg+8Ed1015JJI0Xe7pS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI6cM+sBnE3mtt+5YPvBHddNeSSSNBmn3dNPsivJ0ST3D7X9UZLDSe5tj2uH5v1BkvkkX0py9VD75tY2n2T7+EuRJJ3OUg7vvA/YvED7n1XVpe2xByDJRuDVwPPaMn+eZFWSVcC7gWuAjcBrWl9J0hSd9vBOVX0qydwS17cF+FBV/Q/w1STzwOVt3nxVPQyQ5EOt74NnPGJJ0rKNciL3jUnua4d/Vre29cCjQ30OtbbF2p8kybYk+5PsP3bs2AjDkySdbLmhfyvwk8ClwBHg7eMaUFXtrKpNVbVp7dq141qtJIllXr1TVY+dmE7yF8Dft5eHgYuGul7Y2jhFuyRpSpa1p5/kgqGXLwdOXNmzG3h1kmckuQTYAHwWuAfYkOSSJOcyONm7e/nDliQtx2n39JN8ELgSWJPkEHATcGWSS4ECDgK/C1BVDyS5g8EJ2u8AN1bVd9t63gjcBawCdlXVA2OvRpJ0Sku5euc1CzS/9xT93wa8bYH2PcCeMxqdJGmsvA2DJHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHVnWn0vszdz2OxdsP7jjuimPRJJG456+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SO+OcSR+CfUZS00rinL0kdMfQlqSOnDf0ku5IcTXL/UNv5SfYmOdCeV7f2JLklyXyS+5JcNrTM1tb/QJKtkylHknQqS9nTfx+w+aS27cC+qtoA7GuvAa4BNrTHNuBWGGwkgJuA5wOXAzed2FBIkqbntKFfVZ8Cjp/UvAW4rU3fBrxsqP32GrgbOC/JBcDVwN6qOl5V3wD28uQNiSRpwpZ7TH9dVR1p018D1rXp9cCjQ/0OtbbF2p8kybYk+5PsP3bs2DKHJ0layMgncquqgBrDWE6sb2dVbaqqTWvXrh3XaiVJLD/0H2uHbWjPR1v7YeCioX4XtrbF2iVJU7Tc0N8NnLgCZyvw0aH217areK4AvtUOA90FXJVkdTuBe1VrkyRN0Wm/kZvkg8CVwJokhxhchbMDuCPJDcAjwCtb9z3AtcA88ARwPUBVHU/yVuCe1u/mqjr55LAkacJOG/pV9ZpFZr1kgb4F3LjIenYBu85odJKksfIbuZLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkdOe+8dnbm57Xcu2H5wx3VTHokk/SD39CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjriXTanyLtvSpo19/QlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdWSk0E9yMMkXk9ybZH9rOz/J3iQH2vPq1p4ktySZT3JfksvGUYAkaenGsaf/K1V1aVVtaq+3A/uqagOwr70GuAbY0B7bgFvH8N6SpDMwicM7W4Db2vRtwMuG2m+vgbuB85JcMIH3lyQtYtS7bBbw8SQFvKeqdgLrqupIm/81YF2bXg88OrTsodZ2ZKiNJNsY/CbAxRdfPOLwVgbvvilpWkYN/V+qqsNJfgzYm+Tfh2dWVbUNwpK1DcdOgE2bNp3RspKkUxvp8E5VHW7PR4GPAJcDj504bNOej7buh4GLhha/sLVJkqZk2aGf5EeSPOvENHAVcD+wG9jaum0FPtqmdwOvbVfxXAF8a+gwkCRpCkY5vLMO+EiSE+v5QFX9Q5J7gDuS3AA8Aryy9d8DXAvMA08A14/w3pKkZVh26FfVw8DPLND+n8BLFmgv4Mblvp8kaXR+I1eSOmLoS1JHDH1J6oihL0kdMfQlqSOjfiNXE+TtGSSNm3v6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xOv0V6DFrt8Hr+GXdGru6UtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOeMnmU4y3Y5Z0Ku7pS1JHDH1J6oihL0kdMfQlqSOeyO2EJ3glgXv6ktQVQ1+SOmLoS1JHDH1J6ogncjvnCV6pL+7pS1JH3NPXgvwNQHpqck9fkjpi6EtSRzy8ozPiYR9pZTP0NRZuDKSVYeqhn2Qz8E5gFfCXVbVj2mPQ9LgxkM4uUw39JKuAdwMvBQ4B9yTZXVUPTnMcmj03BtJsTHtP/3JgvqoeBkjyIWALYOgLWHxjME5uWNSzaYf+euDRodeHgOcPd0iyDdjWXj6e5EsjvN8a4OsjLL8S9VbzGdebP57QSKant88YrPlMPWexGWfdidyq2gnsHMe6kuyvqk3jWNdK0VvNvdUL1tyLSdU87ev0DwMXDb2+sLVJkqZg2qF/D7AhySVJzgVeDeye8hgkqVtTPbxTVd9J8kbgLgaXbO6qqgcm+JZjOUy0wvRWc2/1gjX3YiI1p6omsV5J0lnIe+9IUkcMfUnqyIoM/SSbk3wpyXyS7QvMf06SfUnuS/LJJBcOzdua5EB7bJ3uyJdvuTUnuTTJp5M80Oa9avqjX55RPuc2/9lJDiV51/RGPZoRf7YvTvLxJA8leTDJ3DTHvlwj1vwn7Wf7oSS3JMl0R3/mkuxKcjTJ/YvMT6tlvtV82dC80fOrqlbUg8EJ4K8APwGcC3wB2HhSn78BtrbpFwPvb9PnAw+359VtevWsa5pwzc8FNrTpHweOAOfNuqZJ1jw0/53AB4B3zbqeadQMfBJ4aZt+JvDDs65pkjUDvwD8a1vHKuDTwJWzrmkJNf8ycBlw/yLzrwU+BgS4AvhMax9Lfq3EPf3v3cqhqv4XOHErh2EbgX9q058Ymn81sLeqjlfVN4C9wOYpjHlUy665qr5cVQfa9H8AR4G1Uxn1aEb5nEnyc8A64ONTGOu4LLvmJBuBc6pqL0BVPV5VT0xn2CMZ5XMu4IcYbCyeATwdeGziIx5RVX0KOH6KLluA22vgbuC8JBcwpvxaiaG/0K0c1p/U5wvAK9r0y4FnJfnRJS57Nhql5u9JcjmD/yBfmdA4x2nZNSd5GvB24M0TH+V4jfI5Pxf4ZpIPJ/l8kj9tNzg82y275qr6NIONwJH2uKuqHprweKdhsX+TseTXSgz9pXgz8KIknwdexOBbv9+d7ZAm7pQ1tz2F9wPXV9X/zWaIY7dYzW8A9lTVoVkObkIWq/kc4IVt/s8zOFzyuhmNcdwWrDnJTwE/zeCb/euBFyd54eyGuTKcdffeWYLT3sqhHcZ4BUCSZwK/XlXfTHIYuPKkZT85ycGOybJrbq+fDdwJvKX9urgSjPI5vwB4YZI3MDi2fW6Sx6vqSScJzzKj1HwIuLe+fwfbv2NwPPi90xj4CEap+XeAu6vq8TbvY8ALgH+exsAnaLF/k/Hk16xPaizjJMg5DE5gXML3T/w876Q+a4Cntem3ATcPnQj5KoOTIKvb9PmzrmnCNZ8L7APeNOs6plXzSX1ex8o5kTvK57yq9V/bXv8VcOOsa5pwza8C/rGt4+nt5/xXZ13TEuueY/ETudfxgydyP9vax5JfMy9+mf9g1wJfZnBs+i2t7Wbg19r0bwAHWp+/BJ4xtOxvA/Ptcf2sa5l0zcBvAt8G7h16XDrreib9OQ+tY8WE/qg1M/jjRPcBXwTeB5w763omWTODDd17gIcY/E2Od8y6liXW+0EG5yC+zeC4/A3A64HXt/lh8MemvtI+y01Dy46cX96GQZI68lQ9kStJWoChL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjry/11JeywGTfuPAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -772,7 +909,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 18, "metadata": { "pycharm": { "is_executing": false @@ -781,7 +918,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEIpJREFUeJzt3XuMpXV9x/H3h10u9cptS8guOLTStPQi0i3FWqtAbLlYl7aI2KYudNONERMba+q2/aOpqQm0qaixMd2IdTH1Qq0WolihC8ReBF3kDlUWCmG3CKsCLSW2Yr/94/yos+sMc2bOnDkzv32/kpPzPL/nOed8f/PsfM5vfs9zzqaqkCT164BJFyBJGi+DXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktS51ZMuAODII4+sqampSZchSSvKzTff/I2qWjPXfssi6KemptixY8eky5CkFSXJg8Ps59SNJHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1bll8MnYUU1s+O2P7AxefvcSVSNLy5Ihekjpn0EtS5wx6SercUEGf5IEkdyS5NcmO1nZ4kmuT3NvuD2vtSfK+JDuT3J7kpHF2QJL07OYzoj+1qk6sqvVtfQuwvaqOB7a3dYAzgePbbTPwgcUqVpI0f6NM3WwAtrXlbcA509ovr4EbgUOTHD3C60iSRjBs0BdwTZKbk2xubUdV1cNt+evAUW15LfDQtMfuam17SbI5yY4kO/bs2bOA0iVJwxj2Ovqfr6rdSX4QuDbJv07fWFWVpObzwlW1FdgKsH79+nk9VpI0vKFG9FW1u90/CnwaOBl45JkpmXb/aNt9N3DMtIeva22SpAmYM+iTPDfJ859ZBn4RuBO4CtjYdtsIXNmWrwLe2K6+OQV4YtoUjyRpiQ0zdXMU8Okkz+z/0ar6+yRfBq5Isgl4EDiv7X81cBawE3gKuHDRq5YkDW3OoK+q+4GXzND+TeD0GdoLuGhRqpMkjcxPxkpS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1Lmhgz7JqiS3JPlMWz8uyU1Jdib5RJKDWvvBbX1n2z41ntIlScOYz4j+rcA909YvAS6tqhcDjwGbWvsm4LHWfmnbT5I0IUMFfZJ1wNnAB9t6gNOAT7ZdtgHntOUNbZ22/fS2vyRpAoYd0b8H+D3gf9v6EcDjVfV0W98FrG3La4GHANr2J9r+e0myOcmOJDv27NmzwPIlSXOZM+iTvAZ4tKpuXswXrqqtVbW+qtavWbNmMZ9akjTN6iH2eTnw2iRnAYcALwDeCxyaZHUbta8Ddrf9dwPHALuSrAZeCHxz0SuXJA1lzhF9Vf1+Va2rqingfOC6qvoN4Hrg3LbbRuDKtnxVW6dtv66qalGrliQNbZTr6N8BvC3JTgZz8Je19suAI1r724Ato5UoSRrFMFM3/6+qbgBuaMv3AyfPsM+3gdctQm2SpEXgJ2MlqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXOrJ13AuExt+eyM7Q9cfPYSVyJJk+WIXpI6Z9BLUucMeknqnEEvSZ2bM+iTHJLkS0luS3JXkj9u7ccluSnJziSfSHJQaz+4re9s26fG2wVJ0rMZZkT/38BpVfUS4ETgjCSnAJcAl1bVi4HHgE1t/03AY6390rafJGlC5gz6GniyrR7YbgWcBnyytW8DzmnLG9o6bfvpSbJoFUuS5mWoOfokq5LcCjwKXAvcBzxeVU+3XXYBa9vyWuAhgLb9CeCIxSxakjS8oYK+qr5bVScC64CTgR8d9YWTbE6yI8mOPXv2jPp0kqRZzOuqm6p6HLgeeBlwaJJnPlm7DtjdlncDxwC07S8EvjnDc22tqvVVtX7NmjULLF+SNJdhrrpZk+TQtvwDwKuBexgE/rltt43AlW35qrZO235dVdViFi1JGt4w33VzNLAtySoGbwxXVNVnktwNfDzJnwC3AJe1/S8DPpJkJ/At4Pwx1C1JGtKcQV9VtwMvnaH9fgbz9fu2fxt43aJUJ0kamZ+MlaTOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXPD/McjXZna8tlZtz1w8dlLWIkkLQ1H9JLUOYNekjpn0EtS5wx6SeqcQS9Jndvvrrp5NrNdkePVOJJWMkf0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUuTmDPskxSa5PcneSu5K8tbUfnuTaJPe2+8Nae5K8L8nOJLcnOWncnZAkzW6YEf3TwO9W1QnAKcBFSU4AtgDbq+p4YHtbBzgTOL7dNgMfWPSqJUlDmzPoq+rhqvpKW/5P4B5gLbAB2NZ22wac05Y3AJfXwI3AoUmOXvTKJUlDmdccfZIp4KXATcBRVfVw2/R14Ki2vBZ4aNrDdrU2SdIEDB30SZ4H/C3wO1X1H9O3VVUBNZ8XTrI5yY4kO/bs2TOfh0qS5mGorylOciCDkP/rqvpUa34kydFV9XCbmnm0te8Gjpn28HWtbS9VtRXYCrB+/fp5vUlIEvjV4sMa5qqbAJcB91TVu6dtugrY2JY3AldOa39ju/rmFOCJaVM8kqQlNsyI/uXAbwJ3JLm1tf0BcDFwRZJNwIPAeW3b1cBZwE7gKeDCRa1YkjQvcwZ9Vf0TkFk2nz7D/gVcNGJdkqRF4idjJalzBr0kdc6gl6TOGfSS1DmDXpI6N9QHpvZ3fihDmqzZfgc1HEf0ktQ5g16SOufUjaTuON26N0f0ktQ5g16SOufUjaT9xv46peOIXpI6Z9BLUucMeknqnHP0kpYNPwE7Ho7oJalzBr0kdc6gl6TOOUcvaWz21+vWlxuDXtJ+r/c3JINe0pLz6pqlZdBLGsqzhXMvI99eeTJWkjpn0EtS55y6GUHvJ3CkYTnnvrw5opekzjmil7QXR+ff08tf7Y7oJalzBr0kdc6gl6TOzTlHn+RDwGuAR6vqJ1rb4cAngCngAeC8qnosSYD3AmcBTwEXVNVXxlP68tXLvJ6kPgwzov8wcMY+bVuA7VV1PLC9rQOcCRzfbpuBDyxOmZKkhZpzRF9VX0gytU/zBuBVbXkbcAPwjtZ+eVUVcGOSQ5McXVUPL1bB0v5usf5i9Oqa/cdC5+iPmhbeXweOastrgYem7bertUmSJmTk6+irqpLUfB+XZDOD6R2OPfbYUcuQNAtH7lroiP6RJEcDtPtHW/tu4Jhp+61rbd+nqrZW1fqqWr9mzZoFliFJmstCR/RXARuBi9v9ldPa35Lk48DPAk84Pz83r9KRNE7DXF75MQYnXo9Msgv4IwYBf0WSTcCDwHlt96sZXFq5k8HllReOoWapK77Ra9yGuermDbNsOn2GfQu4aNSiJDm3rsXjl5pJC+RIXCuFX4EgSZ1zRL8fcOS5tPx5a7kx6LVfMYS1PzLol5An1xZuUgG9ko7ZSqpVS8ugl1iakDSINSkG/TI27lHsswWPUxlSP7zqRpI654heYzXf6Yql+GtF2t8Y9JqRV6dI/TDoJWmeFjIQmuTgyaDXsjLfKRenaKS5GfSaF6d0pJXHoF+BlmPYOrKWli+DviOGraSZeB29JHXOoJekzhn0ktQ5g16SOmfQS1LnvOpGkhbJcr3yzRG9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM6NJeiTnJHkq0l2JtkyjteQJA1n0YM+ySrgL4AzgROANyQ5YbFfR5I0nHGM6E8GdlbV/VX1P8DHgQ1jeB1J0hDGEfRrgYemre9qbZKkCZjY1xQn2QxsbqtPJvnqAp/qSOAbi1PVxNmX5aeXfoB9WZZyyUh9edEwO40j6HcDx0xbX9fa9lJVW4Gto75Ykh1VtX7U51kO7Mvy00s/wL4sV0vRl3FM3XwZOD7JcUkOAs4HrhrD60iShrDoI/qqejrJW4DPA6uAD1XVXYv9OpKk4Yxljr6qrgauHsdzz2Dk6Z9lxL4sP730A+zLcjX2vqSqxv0akqQJ8isQJKlzyzro5/oqhSQvSrI9ye1Jbkiybp/tL0iyK8n7l67q7zdKP5J8N8mt7Tbxk9oj9uXYJNckuSfJ3UmmlrL2fS20L0lOnXZMbk3y7STnLH0P9qp1lOPyp0nuasflfUmytNXvVeco/bgkyZ3t9vqlrfz7JflQkkeT3DnL9rSf987Wn5OmbduY5N522zhyMVW1LG8MTuTeB/wQcBBwG3DCPvv8DbCxLZ8GfGSf7e8FPgq8f6X2A3hy0sdiEftyA/Dqtvw84DkrtS/T9jkc+NZK7Qvwc8A/t+dYBXwReNUK7MfZwLUMzjs+l8HVfy+Y1DFpNf0CcBJw5yzbzwI+BwQ4Bbhp2r+p+9v9YW35sFFqWc4j+mG+SuEE4Lq2fP307Ul+GjgKuGYJan02I/VjmVlwX9r3Ha2uqmsBqurJqnpqacqe0WIdl3OBz63gvhRwCINgPRg4EHhk7BXPbJR+nAB8oaqerqr/Am4HzliCmmdVVV9gMAiYzQbg8hq4ETg0ydHALwHXVtW3quoxBm9gI/VlOQf9MF+lcBvwq235V4DnJzkiyQHAnwNvH3uVc1twP9r6IUl2JLlx0tMDjNaXHwEeT/KpJLck+bP2BXiTMupxecb5wMfGUuHwFtyXqvoig8B8uN0+X1X3jLne2YxyTG4DzkjynCRHAqey9wc3l6PZ+rvoXyOznIN+GG8HXpnkFuCVDD6B+13gzcDVVbVrksXNw2z9AHhRDT419+vAe5L88IRqHNZsfVkNvKJt/xkGf55fMKEah/Vsx4U2+vpJBp8ZWe5m7EuSFwM/xuAT7GuB05K8YnJlzmnGflTVNQwu6f4XBm+8X2TasdrfTey7boYw51cpVNW/097dkzwP+LWqejzJy4BXJHkzg7ngg5I8WVWT+G78Bfejbdvd7u9PcgPwUgbzmJMwyjHZBdxaVfe3bX/HYF7ysqUofAYjHZfmPODTVfWdMdc6l1GOy28DN1bVk23b54CXAf+4FIXvY9TflXcB72rbPgp8bQlqHsVs/d0NvGqf9htGeqVJnqyY40TGagYnIY7jeydmfnyffY4EDmjL7wLeOcPzXMBkT8YuuB8MTsQcPG2fe9nn5NQK6suqtv+atv5XwEUrsS/Ttt8InDqpPizScXk98A/tOQ4EtgO/vAL7sQo4oi3/FHAng3NCkz42U8x+MvZs9j4Z+6XWfjjwb+33/7C2fPhIdUz6BzHHD+ksBu/K9wF/2NreCby2LZ/bwu9rwAefCcV9nuMCJhj0o/SDwRURd7R/8HcAm1byMQFezeAk2R3Ah4GDVnBfphiMvA6Y9DEZ8d/YKuAvgXuAu4F3r9B+HNLqv5vBG/CJy+CYfIzBeY/vMJhn3wS8CXhT2x4G/0nTfe13Yv20x/4WsLPdLhy1Fj8ZK0mdW+knYyVJczDoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3P8BpdqoH5C0KWEAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAARWklEQVR4nO3df4zkdX3H8eeL41cVW0C25DzApYppz7ai3VKstfIjVoS2h61FbKKHkp5GTGqiSdH+obUlwbZiaWxJz0IFIyitWkjFCqLE2gp4KPKz6oFHuOsJp4BKjVbw3T/me2FYdm9md3Zmdz88H8lkv/P5fGfm/dnZe+1nP9/vfC9VhSSpLXstdwGSpKVnuEtSgwx3SWqQ4S5JDTLcJalBey93AQCHHHJITU9PL3cZkrSq3HTTTd+uqqm5+lZEuE9PT7Nly5blLkOSVpUk98zX57KMJDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aEV8QnUU02d/ct6+beeeMsFKJGnlcOYuSQ0y3CWpQQPDPcn+SW5M8tUktyf5s679yCQ3JNma5KNJ9u3a9+vub+36p8c7BEnSbMPM3H8EnFBVzwOOBk5KcizwHuB9VfVs4EHgzG7/M4EHu/b3dftJkiZoYLhXz8Pd3X26WwEnAP/StV8MnNptb+ju0/WfmCRLVrEkaaCh1tyTrElyM3A/cA1wF/BQVT3S7bIdWNdtrwPuBej6vws8fY7n3JRkS5Itu3btGm0UkqTHGSrcq+rRqjoaOAw4Bvj5UV+4qjZX1UxVzUxNzfkfiUiSFmlBZ8tU1UPA54AXAgcm2X2e/GHAjm57B3A4QNf/M8B3lqRaSdJQhjlbZirJgd32TwEvBe6kF/Kv7HbbCFzRbV/Z3afr/2xV1VIWLUnas2E+oboWuDjJGnq/DC6vqn9LcgfwkSR/AXwFuLDb/0LgQ0m2Ag8Ap4+hbknSHgwM96q6BXj+HO1301t/n93+Q+APlqQ6SdKi+AlVSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQcNcOGzVmj77k3O2bzv3lAlXIkmT5cxdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQQPDPcnhST6X5I4ktyf54679XUl2JLm5u53c95i3J9ma5GtJXjbOAUiSnmiYS/4+Ary1qr6c5GnATUmu6freV1V/3b9zkvXA6cBzgWcAn0nynKp6dCkLlyTNb+DMvap2VtWXu+3vA3cC6/bwkA3AR6rqR1X1TWArcMxSFCtJGs6C1tyTTAPPB27omt6c5JYkFyU5qGtbB9zb97DtzPHLIMmmJFuSbNm1a9eCC5ckzW/ocE9yAPAx4C1V9T3gAuBZwNHATuC9C3nhqtpcVTNVNTM1NbWQh0qSBhgq3JPsQy/YP1xVHweoqvuq6tGq+gnwAR5betkBHN738MO6NknShAxztkyAC4E7q+q8vva1fbu9Arit274SOD3JfkmOBI4Cbly6kiVJgwxztsyLgNcAtya5uWt7B/DqJEcDBWwD3gBQVbcnuRy4g96ZNmd5powkTdbAcK+qLwCZo+uqPTzmHOCcEeqSJI3AT6hKUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIatPegHZIcDlwCHAoUsLmqzk9yMPBRYBrYBpxWVQ8mCXA+cDLwA+CMqvryeMpfnOmzPzln+7ZzT5lwJZI0HsPM3B8B3lpV64FjgbOSrAfOBq6tqqOAa7v7AC8Hjupum4ALlrxqSdIeDQz3qtq5e+ZdVd8H7gTWARuAi7vdLgZO7bY3AJdUz/XAgUnWLnnlkqR5LWjNPck08HzgBuDQqtrZdX2L3rIN9IL/3r6Hbe/aZj/XpiRbkmzZtWvXAsuWJO3J0OGe5ADgY8Bbqup7/X1VVfTW44dWVZuraqaqZqamphbyUEnSAEOFe5J96AX7h6vq413zfbuXW7qv93ftO4DD+x5+WNcmSZqQgeHenf1yIXBnVZ3X13UlsLHb3ghc0df+2vQcC3y3b/lGkjQBA0+FBF4EvAa4NcnNXds7gHOBy5OcCdwDnNb1XUXvNMit9E6FfN2SVixJGmhguFfVF4DM033iHPsXcNaIdUmSRuAnVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoGGuCilJK5L/2f38nLlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIs2X6eORdUiucuUtSg5y5S2rOfH+Fz6fFv86duUtSg5y5S1rxFjoTl+EuaQUxxJfOwGWZJBcluT/JbX1t70qyI8nN3e3kvr63J9ma5GtJXjauwiVJ8xtmzf2DwElztL+vqo7ublcBJFkPnA48t3vM3ydZs1TFSpKGMzDcq+rzwANDPt8G4CNV9aOq+iawFThmhPokSYswytkyb05yS7dsc1DXtg64t2+f7V2bJGmCFntA9QLgz4Hqvr4XeP1CniDJJmATwBFHHLHIMiStRh44Hb9Fzdyr6r6qerSqfgJ8gMeWXnYAh/fteljXNtdzbK6qmaqamZqaWkwZkqR5LGrmnmRtVe3s7r4C2H0mzZXApUnOA54BHAXcOHKVkjRGLV5XamC4J7kMOA44JMl24J3AcUmOprcssw14A0BV3Z7kcuAO4BHgrKp6dDylS5LmMzDcq+rVczRfuIf9zwHOGaUoSdJovLaMJDXIcJekBhnuktQgw12SGmS4S1KDvOSvpLHxk6jLx5m7JDXIcJekBhnuktQgw12SGmS4S1KDPFtmCC1eMU5S25y5S1KDnLlL0jz2dJ7+Sv/L3XCX9DirOdD0GMNd0sj8JOrK45q7JDXIcJekBhnuktQg19wlDc219dXDcJeepAzqtrksI0kNMtwlqUEuy4zAa85IWqmcuUtSgwbO3JNcBPw2cH9V/WLXdjDwUWAa2AacVlUPJglwPnAy8APgjKr68nhKlzQMD5w+OQ0zc/8gcNKstrOBa6vqKODa7j7Ay4Gjutsm4IKlKVOStBADZ+5V9fkk07OaNwDHddsXA9cBf9K1X1JVBVyf5MAka6tq51IVLD3ZeaxHw1jsmvuhfYH9LeDQbnsdcG/fftu7tidIsinJliRbdu3atcgyJElzGfmAajdLr0U8bnNVzVTVzNTU1KhlSJL6LDbc70uyFqD7en/XvgM4vG+/w7o2SdIELTbcrwQ2dtsbgSv62l+bnmOB77reLkmTN8ypkJfRO3h6SJLtwDuBc4HLk5wJ3AOc1u1+Fb3TILfSOxXydWOoWZI0wDBny7x6nq4T59i3gLNGLUqSNBo/oSpJDTLcJalBXjhMWiQ/TKSVzHCXBlgt12ZZLXVqMlyWkaQGOXMfA/9cl7TcDHdpifnLXSuB4S4tM9fKNQ6uuUtSg5y5a6zGvUThEog0N8N9BTCgJC01w12aENfWNUmuuUtSg5y5a0VxiUpaGoa7nlT2tDTiLxC1xHBfwZzFTpZr4mqJ4S5JEzDpyZrhPkHODB8z7u+F32s92Xm2jCQ1yJm75rTS1vudiUsL48xdkhrkzP1JwNP/pKW30v66nc2ZuyQ1yJn7k9xSrWW7Ji6tLCOFe5JtwPeBR4FHqmomycHAR4FpYBtwWlU9OFqZWikMcWl1WIplmeOr6uiqmununw1cW1VHAdd29yVJEzSONfcNwMXd9sXAqWN4DUnSHowa7gVcneSmJJu6tkOrame3/S3g0LkemGRTki1JtuzatWvEMiRJ/UY9oPobVbUjyc8C1yT57/7OqqokNdcDq2ozsBlgZmZmzn20MK6HS9ptpHCvqh3d1/uTfAI4Brgvydqq2plkLXD/EtSpPoa4pEEWvSyT5KlJnrZ7G/gt4DbgSmBjt9tG4IpRi5QkLcwoM/dDgU8k2f08l1bVvyf5EnB5kjOBe4DTRi9TkrQQiw73qrobeN4c7d8BThylKEnSaLz8gCQ1yHCXpAYZ7pLUIC8cJklLaKWcquzMXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBYwv3JCcl+VqSrUnOHtfrSJKeaCzhnmQN8HfAy4H1wKuTrB/Ha0mSnmhcM/djgK1VdXdV/R/wEWDDmF5LkjTL3mN63nXAvX33twO/1r9Dkk3Apu7uw0m+tsjXOgT49iIfu9I4lpWplbG0Mg5oaCx5z0hjeeZ8HeMK94GqajOwedTnSbKlqmaWoKRl51hWplbG0so4wLEMY1zLMjuAw/vuH9a1SZImYFzh/iXgqCRHJtkXOB24ckyvJUmaZSzLMlX1SJI3A58G1gAXVdXt43gtlmBpZwVxLCtTK2NpZRzgWAZKVY3jeSVJy8hPqEpSgwx3SWrQig73QZcwSPLMJNcmuSXJdUkOm9X/00m2J3n/5Kqe2yhjSfJokpu727IemB5xHEckuTrJnUnuSDI9ydpnW+xYkhzf937cnOSHSU6d/AgeV+so78tfJrm9e1/+NkkmW/0Tah1lLO9Jclt3e9VkK39CnRcluT/JbfP0p/t+b+3G8oK+vo1JvtHdNi6qgKpakTd6B2LvAn4O2Bf4KrB+1j7/DGzstk8APjSr/3zgUuD9q3kswMPL/X4s0TiuA17abR8APGW1jqVvn4OBB1brWIBfB/6ze441wBeB41bpWE4BrqF3oshT6Z2199PLOJbfBF4A3DZP/8nAp4AAxwI39P1M3d19PajbPmihr7+SZ+7DXMJgPfDZbvtz/f1JfgU4FLh6ArUOMtJYVpBFj6O7ttDeVXUNQFU9XFU/mEzZc1qq9+SVwKdW8VgK2J9ekO4H7APcN/aK5zfKWNYDn6+qR6rqf4FbgJMmUPOcqurz9H7xz2cDcEn1XA8cmGQt8DLgmqp6oKoepPcLa8HjWMnhPtclDNbN2uerwO91268Anpbk6Un2At4LvG3sVQ5n0WPp7u+fZEuS65f5z/9RxvEc4KEkH0/ylSR/1V1gbrmM+p7sdjpw2VgqHN6ix1JVX6QXkDu726er6s4x17sno7wvXwVOSvKUJIcAx/P4D1OuNPONdZjvwUArOdyH8TbgJUm+AryE3qdgHwXeBFxVVduXs7gFmm8sAM+s3seT/xD4myTPWqYahzHfOPYGXtz1/yq9P7vPWKYah7Wn94RulvVL9D7PsdLNOZYkzwZ+gd6nyNcBJyR58fKVOZQ5x1JVVwNXAf9F7xfuF+l7v55slu3aMkMYeAmDqvofut/gSQ4Afr+qHkryQuDFSd5Eb2133yQPV9VyXVd+0WPp+nZ0X+9Och3wfHrrkpM2ynuyHbi5qu7u+v6V3jrjhZMofA4jvSed04BPVNWPx1zrIKO8L38EXF9VD3d9nwJeCPzHJAqfw6j/Vs4Bzun6LgW+PoGaF2u+se4AjpvVft2Cn325DjYMcTBib3oHEo7ksQMrz521zyHAXt32OcC753ieM1j+A6qLHgu9Ayr79e3zDWYdYFol41jT7T/V3f8n4KzV+J709V8PHL+cP1tL8L68CvhM9xz7ANcCv7NKx7IGeHq3/cvAbfSO8yznezPN/AdUT+HxB1Rv7NoPBr7Z/ds/qNs+eMGvvdw/mAO+MSfT+817F/CnXdu7gd/ttl/Zhd3XgX/cHYKznuMMljncRxkLvbMZbu1+yG8FzlyN4+j6XkrvINetwAeBfVfxWKbpzbD2Wu6frRF/vtYA/wDcCdwBnLeKx7J/N4Y76P3iPXqZx3EZveMYP6a3bn4m8EbgjV1/6P2nRnd1/yZm+h77emBrd3vdYl7fyw9IUoNW+wFVSdIcDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoP8H1xfysAfPXP0AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] @@ -807,7 +944,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "metadata": { "pycharm": { "is_executing": false @@ -832,7 +969,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "metadata": { "pycharm": { "is_executing": false @@ -861,7 +998,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 21, "metadata": { "pycharm": { "is_executing": false @@ -873,16 +1010,16 @@ "output_type": "stream", "text": [ "Proportion of exact matches for each field using threshold: 0.999\n", - "given_name 0.93\n", - "surname 0.96\n", - "street_number 0.88\n", - "address_1 0.92\n", - "address_2 0.80\n", - "suburb 0.92\n", - "postcode 0.95\n", + "given_name 0.95\n", + "surname 0.94\n", + "street_number 0.85\n", + "address_1 0.93\n", + "address_2 0.75\n", + "suburb 0.95\n", + "postcode 0.97\n", "state 1.00\n", - "date_of_birth 0.96\n", - "soc_sec_id 0.40\n", + "date_of_birth 0.98\n", + "soc_sec_id 0.38\n", "dtype: float64\n" ] } @@ -902,7 +1039,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "metadata": { "pycharm": { "is_executing": false @@ -914,15 +1051,15 @@ "output_type": "stream", "text": [ "Proportion of exact matches for each field using threshold: 0.95\n", - "given_name 0.49\n", - "surname 0.57\n", - "street_number 0.81\n", - "address_1 0.55\n", - "address_2 0.44\n", - "suburb 0.70\n", - "postcode 0.84\n", - "state 0.93\n", - "date_of_birth 0.84\n", + "given_name 0.58\n", + "surname 0.59\n", + "street_number 0.73\n", + "address_1 0.67\n", + "address_2 0.53\n", + "suburb 0.71\n", + "postcode 0.89\n", + "state 0.95\n", + "date_of_birth 0.75\n", "soc_sec_id 0.92\n", "dtype: float64\n" ] @@ -931,31 +1068,6 @@ "source": [ "look_at_per_field_accuracy(threshold = 0.95, num_samples = 100)" ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'0.12.0'" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -974,7 +1086,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/docs/tutorial/data/schema.json b/docs/tutorial/data/schema.json index f1153148..fbeebea5 100644 --- a/docs/tutorial/data/schema.json +++ b/docs/tutorial/data/schema.json @@ -1,6 +1,5 @@ - { - "version": 2, + "version": 3, "clkConfig": { "l": 1024, "kdf": { @@ -23,10 +22,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -36,10 +42,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -49,10 +62,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": true, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": true + } } }, { @@ -62,10 +82,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 1, - "positional": true, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 1, + "positional": true + } } }, { @@ -75,10 +102,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 15} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -88,10 +122,17 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 1, - "positional": true, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 7} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 7 + }, + "comparison": { + "type": "ngram", + "n": 1, + "positional": true + } } }, { @@ -101,11 +142,18 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 1, - "positional": true, - "hash": {"type": "doubleHash"}, - "strategy": {"k": 7} + "hash": { + "type": "doubleHash" + }, + "strategy": { + "bitsPerToken": 7 + }, + "comparison": { + "type": "ngram", + "n": 1, + "positional": true + } } } ] -} +} \ No newline at end of file diff --git a/docs/tutorial/data/schema_ABC.json b/docs/tutorial/data/schema_ABC.json index 470e2bb4..612f6dcf 100644 --- a/docs/tutorial/data/schema_ABC.json +++ b/docs/tutorial/data/schema_ABC.json @@ -1,6 +1,5 @@ - { - "version": 2, + "version": 3, "clkConfig": { "l": 1024, "kdf": { @@ -23,9 +22,14 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "strategy": {"k": 15} + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -35,9 +39,14 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": false, - "strategy": {"k": 15} + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": false + } } }, { @@ -47,9 +56,14 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 2, - "positional": true, - "strategy": {"k": 15} + "strategy": { + "bitsPerToken": 15 + }, + "comparison": { + "type": "ngram", + "n": 2, + "positional": true + } } }, { @@ -59,9 +73,14 @@ "encoding": "utf-8" }, "hashing": { - "ngram": 1, - "positional": true, - "strategy": {"k": 8} + "strategy": { + "bitsPerToken": 8 + }, + "comparison": { + "type": "ngram", + "n": 1, + "positional": true + } } }, { @@ -69,4 +88,4 @@ "ignored": true } ] -} +} \ No newline at end of file diff --git a/docs/tutorial/multiparty-linkage-in-entity-service.ipynb b/docs/tutorial/multiparty-linkage-in-entity-service.ipynb index 7f9504a3..a5a5e5f6 100644 --- a/docs/tutorial/multiparty-linkage-in-entity-service.ipynb +++ b/docs/tutorial/multiparty-linkage-in-entity-service.ipynb @@ -52,8 +52,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'project_count': 10, 'rate': 20496894, 'status': 'ok'}\n", - "{'anonlink': '0.11.2', 'entityservice': 'v1.11.0', 'python': '3.6.8'}\n" + "{'project_count': 5944, 'rate': 2260983, 'status': 'ok'}\n", + "{'anonlink': '0.12.5', 'entityservice': 'v1.13.0-alpha', 'python': '3.7.5'}\n" ] } ], @@ -87,11 +87,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "project_id: 8eeb1050f5add8f78ff4a0da04219fead48f22220fb0f15e\n", + "project_id: 21d8916332764c00c0861f1dda132c633c731c377fd89696\n", "\n", - "result_token: c8f22b577aac9432871eeea02cbe504d399a9776add1de9f\n", + "result_token: 4b8c53796161aad56414631fd553d5905256ea5cba0476e8\n", "\n", - "update_tokens: ['6bf0f1c84c17116eb9f93cf8a4cfcb13d49d288a1f376dd8', '4b9265070849af1f0546f2adaeaa85a7d0e60b10f9b4afbc', '3ff03cadd750ce1b40cc4ec2b99db0132f62d8687328eeb9', 'c1b562ece6bbef6cd1a0541301bb1f82bd697bce04736296', '8cfdebbe12c65ae2ff20fd0c0ad5de4feb06c9a9dd1209c8']\n" + "update_tokens: ['f3dafb72996cbc0f453f2acde9dd0e037066039d492c96ee', '28c6cb8b3f85bb528574d51c1f67953af7bb9b835b119451', '028b0b1c05b1e669c7b5bf13caf3a53022481d867c3c0fb9', '105c8d242b51f30388f6f8b0bd4d32189127ea760d22377e', '36955c914e3e0d1aed86a5af32027dfb8a8169532ba4125e']\n" ] } ], @@ -143,27 +143,27 @@ "text": [ "Data provider 1: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"c7d9ba71260863f13af55e12603f8694c29e935262b15687\"\n", + " \"receipt_token\": \"3e102ce587ae97feb18aebf7596aee5ba3ba5b6a41d5bedf\"\n", "}\n", "\n", "Data provider 2: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"70e4ed1b403c4e628183f82548a9297f8417ca3de94648bf\"\n", + " \"receipt_token\": \"ab758b30126ddc083bf65749773fc5856719b4273adc0703\"\n", "}\n", "\n", "Data provider 3: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"b56fe568b93dc4522444e503078e16c18573adecbc086b6a\"\n", + " \"receipt_token\": \"e013c252746cbc5ceb00b4009500769ceb63389de886137c\"\n", "}\n", "\n", "Data provider 4: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"7e3c80e554cfde23847d9aa2cff1323aa8f411e4033c0562\"\n", + " \"receipt_token\": \"f2f38a3206197dd46b53c4c6da079527552d7c6e24b9b63e\"\n", "}\n", "\n", "Data provider 5: {\n", " \"message\": \"Updated\",\n", - " \"receipt_token\": \"8bde91367ee52b5c6804d5ce2d2d3350ce3c3766b8625bbc\"\n", + " \"receipt_token\": \"e489cf14d65b211dd6c8b98b1a902f04e3b09c0e3da21a44\"\n", "}\n", "\n" ] @@ -237,15 +237,15 @@ { "data": { "text/plain": [ - "{'current_stage': {'description': 'waiting for CLKs',\n", - " 'number': 1,\n", - " 'progress': {'absolute': 5,\n", - " 'description': 'number of parties already contributed',\n", - " 'relative': 1.0}},\n", + "{'current_stage': {'description': 'compute similarity scores',\n", + " 'number': 2,\n", + " 'progress': {'absolute': 31440720,\n", + " 'description': 'number of already computed similarity scores',\n", + " 'relative': 0.2984721650891483}},\n", " 'stages': 3,\n", - " 'state': 'queued',\n", - " 'time_added': '2019-06-23T11:17:27.646642+00:00',\n", - " 'time_started': None}" + " 'state': 'running',\n", + " 'time_added': '2019-11-18T02:52:30.352381+00:00',\n", + " 'time_started': '2019-11-18T02:52:30.373760+00:00'}" ] }, "execution_count": 6, @@ -285,12 +285,13 @@ } ], "source": [ - "import clkhash.rest_client\n", "from IPython.display import clear_output\n", - "\n", - "for update in clkhash.rest_client.watch_run_status(SERVER, project_id, run_id, result_token, timeout=30):\n", + "from clkhash.rest_client import RestClient\n", + "from clkhash.rest_client import format_run_status\n", + "rest_client = RestClient(SERVER)\n", + "for update in rest_client.watch_run_status(project_id, run_id, result_token, timeout=300):\n", " clear_output(wait=True)\n", - " print(clkhash.rest_client.format_run_status(update))\n" + " print(format_run_status(update))" ] }, { @@ -315,26 +316,26 @@ { "data": { "text/plain": [ - "[[[0, 3127], [3, 3145], [2, 3152], [1, 3143]],\n", - " [[2, 1653], [3, 1655], [1, 1632], [0, 1673], [4, 1682]],\n", - " [[0, 2726], [1, 2737], [3, 2735]],\n", - " [[1, 837], [3, 864]],\n", - " [[0, 1667], [4, 1676], [1, 1624], [3, 1646]],\n", - " [[1, 1884], [2, 1911], [4, 1926], [0, 1916]],\n", - " [[0, 192], [2, 198]],\n", - " [[3, 328], [4, 330], [0, 350], [2, 351], [1, 345]],\n", - " [[2, 3173], [4, 3176], [3, 3163], [0, 3145], [1, 3161]],\n", - " [[1, 347], [4, 332], [2, 353], [0, 352]],\n", - " [[1, 736], [3, 761], [2, 768], [0, 751], [4, 754]],\n", - " [[1, 342], [2, 349]],\n", - " [[3, 899], [2, 913]],\n", - " [[1, 465], [3, 477]],\n", - " [[0, 285], [1, 293]],\n", - " [[0, 785], [3, 794]],\n", - " [[3, 2394], [4, 2395], [0, 2395]],\n", - " [[1, 1260], [2, 1311], [3, 1281], [4, 1326]],\n", - " [[0, 656], [2, 663]],\n", - " [[1, 2468], [2, 2479]]]" + "[[[0, 287], [2, 293], [4, 277]],\n", + " [[0, 2387], [1, 2386]],\n", + " [[0, 264], [3, 252], [1, 272]],\n", + " [[0, 2496], [4, 2498]],\n", + " [[3, 147], [4, 147]],\n", + " [[3, 815], [4, 812]],\n", + " [[3, 1302], [4, 1343]],\n", + " [[0, 1691], [3, 1674]],\n", + " [[0, 3085], [3, 3117]],\n", + " [[1, 2559], [4, 2545]],\n", + " [[0, 574], [3, 576], [4, 554]],\n", + " [[0, 424], [4, 387]],\n", + " [[1, 1087], [2, 1140]],\n", + " [[1, 468], [2, 489], [3, 482], [4, 469]],\n", + " [[3, 2102], [4, 2115]],\n", + " [[1, 981], [3, 1007]],\n", + " [[0, 696], [3, 704]],\n", + " [[0, 2475], [2, 2501], [1, 2485]],\n", + " [[1, 1034], [2, 1090]],\n", + " [[0, 2785], [4, 2797]]]" ] }, "execution_count": 8, @@ -362,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 9, "metadata": { "pycharm": {} }, @@ -371,89 +372,71 @@ "name": "stdout", "output_type": "stream", "text": [ - "0 ['samual', 'mason', '05-12-1917', 'male', 'pertb', '405808.756', '07 2284 3649']\n", - "3 ['samuAl', 'mason', '05-12-1917', 'male', 'peryh', '4058o8.756', '07 2274 3549']\n", - "2 ['samie', 'mazon', '05-12-1917', 'male', '', '405898.756', '07 2275 3649']\n", - "1 ['zamusl', 'mason', '05-12-2917', 'male', '', '405898.756', '07 2274 2649']\n", + "0 ['mackenzie', 'tremellen', '11-01-2947', 'maoe', 'melbourne', '79469.112', '']\n", + "2 ['mackenzie', 'dremellen', '11-01-2937', 'mals', 'mceloburne', '70469.122', '07 5988 5208']\n", + "4 ['macckenzie', 'tremellen', '', 'malr', 'melbovrne', '70469.122', '07 5988 5208']\n", "\n", - "2 ['thomas', 'burfrod', '08-04-1999', '', 'pertj', '182174.209', '02 3881 9666']\n", - "3 ['thomas', 'burfrod', '09-04-1999', 'male', '', '182174.209', '02 3881 9666']\n", - "1 ['thomas', 'burford', '08-04-19o9', 'mal4', '', '182175.109', '02 3881 9666']\n", - "0 ['thomas', 'burford', '08-04-1999', 'male', 'perth', '182174.109', '02 3881 9666']\n", - "4 ['thomas', 'burf0rd', '08-04-q999', 'mske', 'perrh', '182174.109', '02 3881 9666']\n", + "0 ['sophi', 'couljon', '12-03-1841', 'female', 'sydney', '80972.256', '04 3854 3784']\n", + "1 ['sophie', 'coulson', '12-03-1941', 'female', 'sydney', '80972.356', '04 3854 3784']\n", "\n", - "0 ['kaitlin', 'bondza', '03-08-1961', 'male', 'sydney', '41168.999', '02 4632 1380']\n", - "1 ['kaitlin', 'bondja', '03-08-1961', 'malr', 'sydmey', '41168.999', '02 4632 1370']\n", - "3 [\"k'latlin\", 'bonklza', '03-08-1961', 'male', 'sydaney', '', '02 4632 1380']\n", + "0 ['jasmine', 'clarke', '04-00-2009', 'maje', 'melb0urme', '99853.100', '02 1507 1520']\n", + "3 ['jasmine', 'clarke', '04-09-2009', 'male', 'melbourne', '99853.200', '02 1507 1520']\n", + "1 ['jasminr', 'klarle', '04-99-2009', 'male', 'melbourne', '99863.200', '02 1507 1520']\n", "\n", - "1 ['chr8stian', 'jolly', '22-08-2009', 'male', '', '178371.991', '04 5868 7703']\n", - "3 ['chr8stian', 'jolly', '22-09-2099', 'malr', 'melbokurne', '178271.991', '04 5868 7703']\n", + "0 ['zoel', 'ev', '06-09-1990', 'gemale', 'ysdnvvy', '183366.696', '02 5578 4520']\n", + "4 ['joel', 'everett', '06-09-1990', 'female', 'sydney', '183366.696', '02 5578 4520']\n", "\n", - "0 ['oaklrigh', 'ngvyen', '24-07-1907', 'mslr', 'sydney', '63175.398', '04 9019 6235']\n", - "4 ['oakleith', 'ngvyen', '24-97-1907', 'male', 'sydiney', '63175.498', '04 9019 6235']\n", - "1 ['oajleigh', 'ngryen', '24-07-1007', 'male', 'sydney', '63175.498', '04 9919 6235']\n", - "3 ['oakleigh', 'nguyrn', '34-07-1907', 'male', 'sbdeney', '63175.r98', '04 9019 6235']\n", + "3 ['katelyn', 'matthets', '23-07-1977', '', 'melbourne', '118010.996', '07 9265 9238']\n", + "4 ['kateyln', 'matth4ws', '23-07-1978', 'male', 'melbounre', '118010.996', '07 9265 9238']\n", "\n", - "1 ['georgia', 'nguyen', '06-11-1930', 'male', 'perth', '247847.799', '08 6560 4063']\n", - "2 ['georia', 'nfuyen', '06-11-1930', 'male', 'perrh', '247847.799', '08 6560 4963']\n", - "4 ['geortia', 'nguyea', '06-11-1930', 'male', 'pertb', '247847.798', '08 6560 4063']\n", - "0 ['egorgia', 'nguyqn', '06-11-1930', 'male', 'peryh', '247847.799', '08 6460 4963']\n", + "3 ['max', 'pontifex', '17-07-1930', 'male', 'melbourne', '42337.169', '04 8102 3785']\n", + "4 ['max', 'pontjef', '17-07-1930', 'male', 'melbovrne', '', '04 9102 3785']\n", "\n", - "0 ['connor', 'mcneill', '05-09-1902', 'male', 'sydney', '108473.824', '02 6419 9472']\n", - "2 ['connro', 'mcnell', '05-09-1902', 'male', 'sydnye', '108474.824', '02 6419 9472']\n", + "3 ['talrna', 'seilo', '06-09-1953', 'maoe', '', '55815.962', '03 8568 8024']\n", + "4 ['talezba', 'seib', '06-09-1953', 'male', '', '', '03 8567 8024']\n", "\n", - "3 ['alessandria', 'sherriff', '25-91-1951', 'male', 'melb0urne', '5224r.762', '03 3077 2019']\n", - "4 ['alessandria', 'sherriff', '25-01-1951', 'male', 'melbourne', '52245.762', '03 3077 1019']\n", - "0 ['alessandria', \"sherr'lff\", '25-01-1951', 'malr', 'melbourne', '', '03 3977 1019']\n", - "2 ['alessandria', 'shernff', '25-01-1051', 'mzlr', 'melbourne', '52245.663', '03 3077 1019']\n", - "1 ['alessandrya', 'sherrif', '25-01-1961', 'male', 'jkelbouurne', '52245.762', '03 3077 1019']\n", + "0 ['maddiaon', \"mel'ln\", '21-12-1945', 'male', 'melbouren', '', '02 1963 9316']\n", + "3 ['madklidon', 'meJi7|', '21-12-1945', 'maie', 'melbourne', '98312.180', '02 1964 9316']\n", "\n", - "2 ['harriyon', 'micyelmor', '21-04-1971', 'male', 'pert1>', '291889.942', '04 5633 5749']\n", - "4 ['harri5on', 'micyelkore', '21-04-1971', '', 'pertb', '291880.942', '04 5633 5749']\n", - "3 ['hariso17', 'micelmore', '21-04-1971', 'male', 'pertb', '291880.042', '04 5633 5749']\n", - "0 ['harrison', 'michelmore', '21-04-1981', 'malw', 'preth', '291880.942', '04 5643 5749']\n", - "1 ['harris0n', 'michelmoer', '21-04-1971', '', '', '291880.942', '04 5633 5749']\n", + "0 ['holly', 'reih', '22-06-2009', 'msle', 'syconey', '131184.582', '']\n", + "3 ['holly', 'reicl', '21-06-2009', 'male', 'sydey', '131184.582', '']\n", "\n", - "1 ['alannah', 'gully', '15-04-1903', 'make', 'meobourne', '134518.814', '04 5104 4572']\n", - "4 ['alana', 'gully', '15-04-1903', 'male', 'melbourne', '134518.814', '04 5104 4582']\n", - "2 ['alama', 'gulli', '15-04-1903', 'mald', 'melbourne', '134518.814', '04 5104 5582']\n", - "0 ['alsna', 'gullv', '15-04-1903', 'male', '', '134518.814', '04 5103 4582']\n", + "1 ['jessica', 'peteahsen', '30-07-1940', 'malr', 'mel1>oume', '173806.400', '04 7005 4927']\n", + "4 ['jes5ica', 'peter5en', '30-08-1040', 'male', 'melbourne', '173806.400', '04 7005 49q7']\n", "\n", - "1 ['sraah', 'bates-brownsword', '26-11-1905', 'malr', '', '59685.979', '03 8545 5584']\n", - "3 ['sarah', 'bates-brownswort', '26-11-1905', 'male', '', '59686.879', '03 8545 6584']\n", - "2 ['sara0>', 'bates-browjsword', '26-11-1905', 'male', '', '59685.879', '']\n", - "0 ['saran', 'bates-brownsvvord', '26-11-1905', 'malr', 'sydney', '59685.879', '03 8555 5584']\n", - "4 ['snrah', 'bates-bro2nsword', '26-11-1005', 'male', 'sydney', '58685.879', '03 8545 5584']\n", + "0 ['thomas', 'kositcin', '26-08-1939', 'male', 'melbourne', '43048.734', '07 4737 4471']\n", + "3 ['tomas', 'kosutcin', '26-08-1939', 'msle', 'melbourne', '43048.735', '07 4737 4471']\n", + "4 ['thornas', 'kos9tcin', '26-08-1939', 'male', 'melborune', '43948.734', '07 4737 4471']\n", "\n", - "1 ['beth', 'lette', '18-01-2000', 'female', 'sydney', '179719.049', '07 1868 6031']\n", - "2 ['beth', 'lette', '18-02-2000', 'femal4', 'stdq7ey', '179719.049', '07 1868 6931']\n", + "0 ['sofie', 'ny', '20-10-1933', 'fenale', '', '135685.300', '07 7905 6885']\n", + "4 ['stofia', 'ny', '20-10-q933', 'female', 'sydnev', '135685.300', '07 7905 6885']\n", "\n", - "3 ['tahlia', 'bishlp', '', 'female', 'sydney', '101203.290', '03 886u 1916']\n", - "2 ['ahlia', 'bishpp', '', 'female', 'syriey', '101204.290', '03 8867 1916']\n", + "1 ['sophie', 'mazx9ne', '25-03-2814', 'make', 'melbourne', '36878.525', '08 3679 2653']\n", + "2 ['sofie', 'mazzone', '25-03-2924', 'mals', 'melbourne', '36878.526', '08 3678 2653']\n", "\n", - "1 ['fzachary', 'mydlalc', '20-95-1916', 'male', 'sydney', '121209.129', '08 3807 4717']\n", - "3 ['zachary', 'mydlak', '20-05-1016', 'malr', 'sydhey', '121200.129', '08 3807 4627']\n", + "1 ['stephnaie', 'goldsworthy', '03-06-1958', '', 'canbrrra', '83372.67q', '02 4093 4044']\n", + "2 ['sttepbanie', 'goldsworthy', '03-06-1958', 'mald', 'canbedra', '83372.772', '02 4093 4044']\n", + "3 ['stefanie', 'goldsworthy', '03-06-1958', 'male', 'camberra', '83372.572', '']\n", + "4 ['stefanie', 'go|dsworthy', '03-06-1958', '', 'cabr:erra', '83372.672', '02 4093 4044']\n", "\n", - "0 ['jessica', 'white', '04-07-1979', 'male', 'perth', '385632.266', '04 8026 8748']\n", - "1 ['jezsica', 'whi5e', '05-07-1979', 'male', 'perth', '385632.276', '04 8026 8748']\n", + "3 ['antony', 'riean', '18-01-1908', 'male', 'canberra', '59633.334', '07 2734 8270']\n", + "4 ['anthnoy', 'ryari', '18-01-1908', 'male', 'cajberra', '58633.434', '07 2734 8370']\n", "\n", - "0 ['beriiamin', 'musoluno', '21-0y-1994', 'female', 'sydney', '81857.391', '08 8870 e498']\n", - "3 ['byenzakin', 'musoljno', '21-07-1995', 'female', 'sydney', '81857.392', '']\n", + "1 ['eiahn', 'greeti', '11-0e-1977', 'male', 'melbourne', '68538.966', '03 8798 1825']\n", + "3 ['eirn', 'kreen', '11-04-1977', 'male', 'meluourne', '68548.95y', '03 8798 1825']\n", "\n", - "3 ['ella', 'howie', '26-03-2003', 'male', 'melbourne', '97556.316', '03 3655 1171']\n", - "4 ['ela', 'howie', '26-03-2003', 'male', 'melboirne', '', '03 3555 1171']\n", - "0 ['lela', 'howie', '26-03-2903', 'male', 'melbourhe', '', '03 3655 1171']\n", + "0 ['aleesga', 'nkuyen', '14-06-1068', 'male', 'melbourrie', '122053.275', '02 6678 5223']\n", + "3 ['aleeSa', 'nguyen', '14-o6-1968', 'male', 'mtelbournr', '122053.265', '02 6678 5223']\n", "\n", - "1 ['livia', 'riaj', '13-03-1907', 'malw', 'melbovrne', '73305.107', '07 3846 2530']\n", - "2 ['livia', 'ryank', '13-03-1907', 'malw', 'melbuorne', '73305.107', '07 3946 2630']\n", - "3 ['ltvia', 'ryan', '13-03-1907', 'maoe', 'melbourne', '73305.197', '07 3046 2530']\n", - "4 ['livia', 'ryan', '13-03-1907', 'male', 'melbourne', '73305.107', '07 3946 2530']\n", + "0 ['benjamin', 'bishop', '25-11-1980', 'male', 'sydney', '95170.703', '04 3415 3977']\n", + "2 [\"benzam'ln\", 'bish9p', '25-11-1980', 'msle', 'sydn3v', '95170.703', '04 3415 3977']\n", + "1 ['bennie', 'bishop', '25-11-1980', 'mald', '', '95180.703', '04 3415 3977']\n", "\n", - "0 ['coby', 'ibshop', '', 'msle', 'sydney', '211655.118', '02 0833 7777']\n", - "2 ['coby', 'bishop', '15-08-1948', 'male', 'sydney', '211655.118', '02 9833 7777']\n", + "1 [\"ke'Irx\", 'chappel', '19-05-1966', 'male', '', '138869.396', '']\n", + "2 ['keira', 'chapepl', '19-05-1966', 'male', '', '148869.296', '']\n", "\n", - "1 ['emjkly', 'pareemore', '01-03-2977', 'female', 'rnelbourne', '1644487.925', '03 5761 5483']\n", - "2 ['emiily', 'parremore', '01-03-1977', 'female', 'melbourne', '1644487.925', '03 5761 5483']\n", + "0 ['deagxan', 'zaffino', '22-01-1979', 'femame', 'sydne7', '99746.221', '04 1534 02e5']\n", + "4 ['teagan', 'zaffino', '22-01-1979', 'female', 'sydney', '99746.221', '04 1534 0225']\n", "\n" ] } @@ -498,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "metadata": { "pycharm": {} }, @@ -538,7 +521,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.3" }, "pycharm": { "stem_cell": { diff --git a/docs/tutorial/multiparty-linkage-with-clkhash.ipynb b/docs/tutorial/multiparty-linkage-with-clkhash.ipynb index a6a2ad3d..20ddd0ed 100644 --- a/docs/tutorial/multiparty-linkage-with-clkhash.ipynb +++ b/docs/tutorial/multiparty-linkage-with-clkhash.ipynb @@ -4,7 +4,9 @@ "cell_type": "code", "execution_count": 1, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [], "source": [ @@ -18,11 +20,14 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "pycharm": { + "is_executing": false + } + }, "outputs": [], "source": [ - "KEY1 = 'correct'\n", - "KEY2 = 'horse'\n", + "SECRET = 'my_secret'\n", "\n", "SERVER = os.getenv(\"SERVER\", \"https://testing.es.data61.xyz\")" ] @@ -58,106 +63,38 @@ "cell_type": "code", "execution_count": 3, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ - "keys: correct, horse\n" - ] + "keys: my_secret\n" + ], + "output_type": "stream" } ], "source": [ - "print(f'keys: {KEY1}, {KEY2}')" + "print(f'keys: {SECRET}')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "name": "stdout", - "output_type": "stream", "text": [ - "\n", - "{\n", - " \"version\": 2,\n", - " \"clkConfig\": {\n", - " \"l\": 1024,\n", - " \"kdf\": {\n", - " \"type\": \"HKDF\",\n", - " \"hash\": \"SHA256\",\n", - " \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n", - " \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n", - " \"keySize\": 64\n", - " }\n", - " },\n", - " \"features\": [\n", - " {\n", - " \"identifier\": \"id\",\n", - " \"ignored\": true\n", - " },\n", - " {\n", - " \"identifier\": \"givenname\",\n", - " \"format\": {\n", - " \"type\": \"string\",\n", - " \"encoding\": \"utf-8\"\n", - " },\n", - " \"hashing\": {\n", - " \"ngram\": 2,\n", - " \"positional\": false,\n", - " \"strategy\": {\"k\": 15}\n", - " }\n", - " },\n", - " {\n", - " \"identifier\": \"surname\",\n", - " \"format\": {\n", - " \"type\": \"string\",\n", - " \"encoding\": \"utf-8\"\n", - " },\n", - " \"hashing\": {\n", - " \"ngram\": 2,\n", - " \"positional\": false,\n", - " \"strategy\": {\"k\": 15}\n", - " }\n", - " },\n", - " {\n", - " \"identifier\": \"dob\",\n", - " \"format\": {\n", - " \"type\": \"string\",\n", - " \"encoding\": \"utf-8\"\n", - " },\n", - " \"hashing\": {\n", - " \"ngram\": 2,\n", - " \"positional\": true,\n", - " \"strategy\": {\"k\": 15}\n", - " }\n", - " },\n", - " {\n", - " \"identifier\": \"phone number\",\n", - " \"format\": {\n", - " \"type\": \"string\",\n", - " \"encoding\": \"utf-8\"\n", - " },\n", - " \"hashing\": {\n", - " \"ngram\": 1,\n", - " \"positional\": true,\n", - " \"strategy\": {\"k\": 8}\n", - " }\n", - " },\n", - " {\n", - " \"identifier\": \"ignoredForLinkage\",\n", - " \"ignored\": true\n", - " }\n", - " ]\n", - "}\n", - "\n" - ] + "{\n \"version\": 3,\n \"clkConfig\": {\n \"l\": 1024,\n \"kdf\": {\n \"type\": \"HKDF\",\n \"hash\": \"SHA256\",\n \"salt\": \"SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==\",\n \"info\": \"c2NoZW1hX2V4YW1wbGU=\",\n \"keySize\": 64\n }\n },\n \"features\": [\n {\n \"identifier\": \"id\",\n \"ignored\": true\n },\n {\n \"identifier\": \"givenname\",\n \"format\": {\n \"type\": \"string\",\n \"encoding\": \"utf-8\"\n },\n \"hashing\": {\n \"strategy\": {\n \"bitsPerToken\": 15\n },\n \"comparison\": {\n \"type\": \"ngram\",\n \"n\": 2,\n \"positional\": false\n }\n }\n },\n {\n \"identifier\": \"surname\",\n \"format\": {\n \"type\": \"string\",\n \"encoding\": \"utf-8\"\n },\n \"hashing\": {\n \"strategy\": {\n \"bitsPerToken\": 15\n },\n \"comparison\": {\n \"type\": \"ngram\",\n \"n\": 2,\n \"positional\": false\n }\n }\n },\n {\n \"identifier\": \"dob\",\n \"format\": {\n \"type\": \"string\",\n \"encoding\": \"utf-8\"\n },\n \"hashing\": {\n \"strategy\": {\n \"bitsPerToken\": 15\n },\n \"comparison\": {\n \"type\": \"ngram\",\n \"n\": 2,\n \"positional\": true\n }\n }\n },\n {\n \"identifier\": \"phone number\",\n \"format\": {\n \"type\": \"string\",\n \"encoding\": \"utf-8\"\n },\n \"hashing\": {\n \"strategy\": {\n \"bitsPerToken\": 8\n },\n \"comparison\": {\n \"type\": \"ngram\",\n \"n\": 1,\n \"positional\": true\n }\n }\n },\n {\n \"identifier\": \"ignoredForLinkage\",\n \"ignored\": true\n }\n ]\n}\n" + ], + "output_type": "stream" } ], "source": [ @@ -180,100 +117,19 @@ "cell_type": "code", "execution_count": 5, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgivennamesurnamedobphone numbergender
00tarahilton27-08-194108 2210 0298male
13saJivernre22-12-297202 1090 1906mals
27sliverpaciorekNaNNaNmals
39rubygeorge09-05-193907 4698 6255male
410eyrinmcampbell29-1q-198308 299y 1535male
\n", - "
" - ], - "text/plain": [ - " id givenname surname dob phone number gender\n", - "0 0 tara hilton 27-08-1941 08 2210 0298 male\n", - "1 3 saJi vernre 22-12-2972 02 1090 1906 mals\n", - "2 7 sliver paciorek NaN NaN mals\n", - "3 9 ruby george 09-05-1939 07 4698 6255 male\n", - "4 10 eyrinm campbell 29-1q-1983 08 299y 1535 male" - ] + "text/plain": " id givenname surname dob phone number gender\n0 0 tara hilton 27-08-1941 08 2210 0298 male\n1 3 saJi vernre 22-12-2972 02 1090 1906 mals\n2 7 sliver paciorek NaN NaN mals\n3 9 ruby george 09-05-1939 07 4698 6255 male\n4 10 eyrinm campbell 29-1q-1983 08 299y 1535 male", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idgivennamesurnamedobphone numbergender
00tarahilton27-08-194108 2210 0298male
13saJivernre22-12-297202 1090 1906mals
27sliverpaciorekNaNNaNmals
39rubygeorge09-05-193907 4698 6255male
410eyrinmcampbell29-1q-198308 299y 1535male
\n
" }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 5 } ], "source": [ @@ -293,100 +149,19 @@ "cell_type": "code", "execution_count": 6, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgivennamesurnamedobphone numbercity
03zaliverner22-12-197202 1090 1906perth
14samueltremellen21-12-192303 3605 9336melbourne
25amylodge16-01-195807 8286 9372canberra
37oIjipacioerk10-02-195904 4220 5949sydney
410erinkampgell29-12-198308 2996 1445perth
\n", - "
" - ], - "text/plain": [ - " id givenname surname dob phone number city\n", - "0 3 zali verner 22-12-1972 02 1090 1906 perth\n", - "1 4 samuel tremellen 21-12-1923 03 3605 9336 melbourne\n", - "2 5 amy lodge 16-01-1958 07 8286 9372 canberra\n", - "3 7 oIji pacioerk 10-02-1959 04 4220 5949 sydney\n", - "4 10 erin kampgell 29-12-1983 08 2996 1445 perth" - ] + "text/plain": " id givenname surname dob phone number city\n0 3 zali verner 22-12-1972 02 1090 1906 perth\n1 4 samuel tremellen 21-12-1923 03 3605 9336 melbourne\n2 5 amy lodge 16-01-1958 07 8286 9372 canberra\n3 7 oIji pacioerk 10-02-1959 04 4220 5949 sydney\n4 10 erin kampgell 29-12-1983 08 2996 1445 perth", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idgivennamesurnamedobphone numbercity
03zaliverner22-12-197202 1090 1906perth
14samueltremellen21-12-192303 3605 9336melbourne
25amylodge16-01-195807 8286 9372canberra
37oIjipacioerk10-02-195904 4220 5949sydney
410erinkampgell29-12-198308 2996 1445perth
\n
" }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 6 } ], "source": [ @@ -406,100 +181,19 @@ "cell_type": "code", "execution_count": 7, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgivennamesurnamedobphone numberincome
01joshuaarkwright16-02-190304 8511 958070189.446
13zal:verner22-12-197202 1090 190650194.118
27oliyerpaciorwk10-02-195904 4210 594931750.993
38nacoyaranson17-08-192507 6033 4580102446.131
410erihcampbell29-12-1i8308 299t 1435331476.599
\n", - "
" - ], - "text/plain": [ - " id givenname surname dob phone number income\n", - "0 1 joshua arkwright 16-02-1903 04 8511 9580 70189.446\n", - "1 3 zal: verner 22-12-1972 02 1090 1906 50194.118\n", - "2 7 oliyer paciorwk 10-02-1959 04 4210 5949 31750.993\n", - "3 8 nacoya ranson 17-08-1925 07 6033 4580 102446.131\n", - "4 10 erih campbell 29-12-1i83 08 299t 1435 331476.599" - ] + "text/plain": " id givenname surname dob phone number income\n0 1 joshua arkwright 16-02-1903 04 8511 9580 70189.446\n1 3 zal: verner 22-12-1972 02 1090 1906 50194.118\n2 7 oliyer paciorwk 10-02-1959 04 4210 5949 31750.993\n3 8 nacoya ranson 17-08-1925 07 6033 4580 102446.131\n4 10 erih campbell 29-12-1i83 08 299t 1435 331476.599", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idgivennamesurnamedobphone numberincome
01joshuaarkwright16-02-190304 8511 958070189.446
13zal:verner22-12-197202 1090 190650194.118
27oliyerpaciorwk10-02-195904 4210 594931750.993
38nacoyaranson17-08-192507 6033 4580102446.131
410erihcampbell29-12-1i8308 299t 1435331476.599
\n
" }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 7 } ], "source": [ @@ -521,15 +215,17 @@ "cell_type": "code", "execution_count": 8, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { - "name": "stderr", - "output_type": "stream", + "name": "stdout", "text": [ - "Project created\n" - ] + "\u001b[31mProject created\u001b[0m\r\n" + ], + "output_type": "stream" } ], "source": [ @@ -558,40 +254,38 @@ "cell_type": "code", "execution_count": 9, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [ { - "name": "stderr", - "output_type": "stream", + "name": "stdout", "text": [ - "\n", - "generating CLKs: 0%| | 0.00/3.23k [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
gendercityincome
0peGh395273.665
1sydnev77367.636
2pertb323383.650
3syd1e7y79745.538
4perth28019.494
5canberra78961.675
6femalebrisnane
7malecanbetra
8sydme7106849.526
9melbourne68548.966
\n", - "" - ], - "text/plain": [ - " gender city income\n", - "0 peGh 395273.665\n", - "1 sydnev 77367.636\n", - "2 pertb 323383.650\n", - "3 syd1e7y 79745.538\n", - "4 perth 28019.494\n", - "5 canberra 78961.675\n", - "6 female brisnane \n", - "7 male canbetra \n", - "8 sydme7 106849.526\n", - "9 melbourne 68548.966" - ] + "text/plain": " gender city income\n0 male sydney \n1 male canbrrra \n2 femake sydn4v \n3 pertb 21407e.192\n4 femake sydriey \n5 mlebourne 56899.522\n6 male canberra \n7 female 44652.704\n8 male sydnely \n9 male 65381.450", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
gendercityincome
0malesydney
1malecanbrrra
2femakesydn4v
3pertb21407e.192
4femakesydriey
5mlebourne56899.522
6malecanberra
7female44652.704
8malesydnely
9male65381.450
\n
" }, - "execution_count": 19, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 19 } ], "source": [ @@ -951,33 +547,19 @@ "cell_type": "code", "execution_count": 20, "metadata": { - "pycharm": {}, + "pycharm": { + "is_executing": false + }, "scrolled": true }, "outputs": [ { "data": { - "text/plain": [ - "[[[0, 2111], [1, 2100]],\n", - " [[0, 2121], [2, 2131], [1, 2111]],\n", - " [[1, 1146], [2, 1202], [0, 1203]],\n", - " [[1, 2466], [2, 2478], [0, 2460]],\n", - " [[0, 429], [1, 412]],\n", - " [[0, 2669], [1, 1204]],\n", - " [[1, 1596], [2, 1623]],\n", - " [[0, 487], [1, 459]],\n", - " [[1, 1776], [2, 1800], [0, 1806]],\n", - " [[1, 2586], [2, 2602]],\n", - " [[0, 919], [1, 896]],\n", - " [[0, 100], [2, 107], [1, 100]],\n", - " [[0, 129], [1, 131], [2, 135]],\n", - " [[0, 470], [1, 440]],\n", - " [[0, 1736], [1, 1692], [2, 1734]]]" - ] + "text/plain": "[[[1, 2065], [0, 2428]],\n [[0, 1740], [1, 1693], [2, 1736]],\n [[1, 2224], [2, 2236]],\n [[0, 565], [1, 557], [2, 564]],\n [[0, 1980], [1, 1953]],\n [[0, 536], [2, 525], [1, 512]],\n [[1, 171], [2, 175], [0, 169]],\n [[0, 2234], [1, 2228], [2, 2242]],\n [[0, 918], [2, 2840]],\n [[0, 2461], [2, 2479], [1, 2468]],\n [[0, 2451], [2, 2471], [1, 2458]],\n [[0, 230], [1, 232]],\n [[0, 2765], [2, 2794], [1, 2789]],\n [[0, 1758], [2, 1754], [1, 1712]],\n [[1, 351], [2, 356]]]" }, - "execution_count": 20, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 20 } ], "source": [ @@ -999,7 +581,9 @@ "cell_type": "code", "execution_count": 21, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false + } }, "outputs": [], "source": [ @@ -1023,200 +607,20 @@ "cell_type": "code", "execution_count": 22, "metadata": { - "pycharm": {} + "pycharm": { + "is_executing": false, + "name": "#%%\n" + } }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgiven namesurnamedobphone numbernon-linking
64261171isabellebridgland30-03-199404 5318 6471mal4
64271171isalolIeriahgland30-02-199404 5318 6471sydnry
64281171isabellebridgland30-02-199404 5318 647163514.217
6429
64301243thmoasdoaldson13-04-190009 6963 1944male
64311243thoma5donaldson13-04-190008 6962 1944perth
64321243thomasdonalsdon13-04-290008 6963 2944489229.297
6433
64342207annahaslea02-11-290604 5501 5973male
64352207hannaheaslea02-11-200604 5501 5973canberra
6436
64375726rhysclarke19-05-192902 9220 9635mqle
64385726ry5clarke19-05-193902 9120 9635
64395726rhysklark19-05-293802 9220 9635118197.119
6440
\n", - "
" - ], - "text/plain": [ - " id given name surname dob phone number non-linking\n", - "6426 1171 isabelle bridgland 30-03-1994 04 5318 6471 mal4\n", - "6427 1171 isalolIe riahgland 30-02-1994 04 5318 6471 sydnry\n", - "6428 1171 isabelle bridgland 30-02-1994 04 5318 6471 63514.217\n", - "6429 \n", - "6430 1243 thmoas doaldson 13-04-1900 09 6963 1944 male\n", - "6431 1243 thoma5 donaldson 13-04-1900 08 6962 1944 perth\n", - "6432 1243 thomas donalsdon 13-04-2900 08 6963 2944 489229.297\n", - "6433 \n", - "6434 2207 annah aslea 02-11-2906 04 5501 5973 male\n", - "6435 2207 hannah easlea 02-11-2006 04 5501 5973 canberra\n", - "6436 \n", - "6437 5726 rhys clarke 19-05-1929 02 9220 9635 mqle\n", - "6438 5726 ry5 clarke 19-05-1939 02 9120 9635 \n", - "6439 5726 rhys klark 19-05-2938 02 9220 9635 118197.119\n", - "6440 " - ] + "text/plain": " id given name surname dob phone number non-linking\n6450 \n6451 1522 poahtia torpe 22-09-1999 07 6482 4546 femalr\n6452 1522 portia thorpe 22-09-1999 07 6482 4546 canberra\n6453 \n6454 8662 luct pulfort 05-03-1903 02 0726 9479 male\n6455 8662 lucy pulford 05-03-1903 melbourrie\n6456 8662 lusy pulford 05-03-1993 02 0726 0489 192230.309\n6457 \n6458 5797 chelsie pajc0ek 27-03-1961 07 3258 9992 male\n6459 5797 chel5i padci4 27-04-1961 07 3258 0991 sydney\n6460 5797 chelsie pasl\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idgiven namesurnamedobphone numbernon-linking
6450
64511522poahtiatorpe22-09-199907 6482 4546femalr
64521522portiathorpe22-09-199907 6482 4546canberra
6453
64548662luctpulfort05-03-190302 0726 9479male
64558662lucypulford05-03-1903melbourrie
64568662lusypulford05-03-199302 0726 0489192230.309
6457
64585797chelsiepajc0ek27-03-196107 3258 9992male
64595797chel5ipadci427-04-196107 3258 0991sydney
64605797chelsiepasl<oe27-94-196107 3258 089262334.690
6461
64621885nicholasrobson06-01-191402 7799 6803canberra
64631885nicho|asrobson06-91-191402 7799 680361333.218
6464
\n" }, - "execution_count": 22, "metadata": {}, - "output_type": "execute_result" + "output_type": "execute_result", + "execution_count": 22 } ], "source": [ @@ -1226,7 +630,8 @@ " table.append([dataset_alice, dataset_bob, dataset_charlie][i][j])\n", " table.append([''] * 6)\n", " \n", - "pd.DataFrame(table, columns=['id', 'given name', 'surname', 'dob', 'phone number', 'non-linking']).tail(15)" + "pd.DataFrame(table, columns=['id', 'given name', 'surname', 'dob', 'phone number', 'non-linking']).tail(15)\n", + "\n" ] } ], @@ -1246,9 +651,18 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.3" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/docs/tutorial/tutorial-requirements.txt b/docs/tutorial/tutorial-requirements.txt index cb0725bb..5d2f8bf8 100644 --- a/docs/tutorial/tutorial-requirements.txt +++ b/docs/tutorial/tutorial-requirements.txt @@ -1,4 +1,4 @@ -clkhash>=0.13.0 +clkhash==0.15.0 ipython matplotlib recordlinkage