Skip to content
Prev Previous commit
Next Next commit
breaking up too long commands, delete project at the end
  • Loading branch information
wilko committed Nov 21, 2019
commit 2af440e4a772c071ad4519210ad2078fc095a6fa
109 changes: 64 additions & 45 deletions docs/tutorial/Permutations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{\"project_count\": 6539, \"rate\": 2530484, \"status\": \"ok\"}\r\n"
"{\"project_count\": 7050, \"rate\": 2824020, \"status\": \"ok\"}\r\n"
]
}
],
Expand Down Expand Up @@ -290,7 +290,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmp5qsl5x48\n"
"Overwriting /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmp3jpcxxrs\n"
]
}
],
Expand Down Expand Up @@ -394,17 +394,17 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Credentials will be saved in /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmpdo3x0629\n",
"Credentials will be saved in /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmp_tz_feve\n",
"\u001b[31mProject created\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'project_id': 'fbb0845d2063e5cefe9153ebeacf42921418038a11c104ef',\n",
" 'result_token': '635bd95ab7c4d834bdf811aed0c026a81cd4944ba66ffc15',\n",
" 'update_tokens': ['ab2f33eef06d045db454d4fbc7821ea5971970beafede1be',\n",
" '13dc3ac340a2b51c78400a301fbaebc819022e5d231bb4a7']}"
"{'project_id': '7c942add9259b0c61fc06ce24afc6ee9c99355cc5a5eae7a',\n",
" 'result_token': '4552074bebabf66a19e707ef64aa35638fc1eb2cd3b9a768',\n",
" 'update_tokens': ['1045c9dda873d3cccf37181bcff7c61a5e82c6051d0da2c0',\n",
" 'fc27160c4e4736c1dbbecbedd6bc5e4117a3626c1f2eda9c']}"
]
},
"execution_count": 7,
Expand All @@ -416,7 +416,12 @@
"creds = NamedTemporaryFile('wt')\n",
"print(\"Credentials will be saved in\", creds.name)\n",
"\n",
"!clkutil create-project --schema \"{schema.name}\" --output \"{creds.name}\" --type \"permutations\" --server \"{url}\"\n",
"!clkutil create-project \\\n",
" --schema \"{schema.name}\" \\\n",
" --output \"{creds.name}\" \\\n",
" --type \"permutations\" \\\n",
" --server \"{url}\"\n",
"\n",
"creds.seek(0)\n",
"\n",
"import json\n",
Expand Down Expand Up @@ -458,8 +463,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[31mCLK data written to /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmpbd2u9qhd.json\u001b[0m\n",
"\u001b[31mCLK data written to /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmp5al8agy7.json\u001b[0m\n"
"\u001b[31mCLK data written to /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmppybfm62c.json\u001b[0m\n",
"\u001b[31mCLK data written to /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmpu4jx4mjv.json\u001b[0m\n"
]
}
],
Expand Down Expand Up @@ -818,7 +823,7 @@
{
"data": {
"text/plain": [
"[1886, 1657, 2212, 4844, 3960, 125, 1791, 2770, 4888, 1367]"
"[3645, 1068, 4371, 465, 1533, 987, 343, 53, 3298, 2515]"
]
},
"execution_count": 20,
Expand Down Expand Up @@ -852,7 +857,7 @@
{
"data": {
"text/plain": [
"[4498, 3092, 520, 3834, 1074, 4519, 997, 991, 4505, 2389]"
"[3857, 4827, 3267, 4934, 1958, 3682, 4576, 4895, 4867, 1188]"
]
},
"execution_count": 21,
Expand Down Expand Up @@ -926,16 +931,16 @@
{
"data": {
"text/plain": [
"['rec-1225-org,hayden,ballantyne,13,,nunnook,young,2077,nsw,19330812,3414771\\n',\n",
" 'rec-4635-org,isabella,white,8,cooling place,,rosebud,6151,sa,19990911,2206317\\n',\n",
" 'rec-1790-org,bailey,heuer,65,fossey street,brindabella specialist centre,vaucluse,2010,qld,19511013,9539538\\n',\n",
" 'rec-2882-org,sarah,eglinton,19,beasley street,bandaroo,naracoorte,4021,nsw,19451107,4310446\\n',\n",
" 'rec-3521-org,spencer,bates-brownsword,151,pinkerton circuit,tora,smithfield,4860,nsw,19810308,5402648\\n',\n",
" 'rec-2055-org,tai,garven,21,finniss crescent,donette downs,pymble,2035,nsw,19930723,6253715\\n',\n",
" 'rec-1529-org,zachariah,campbell,32,gellibrand street,carowood,keswick,3148,vic,19271210,2544494\\n',\n",
" 'rec-1817-org,noah,boyle,11,dooland court,,flowerdale,3163,vic,19260331,2019310\\n',\n",
" 'rec-4200-org,lara,sekuless,82,loch street,,yarraville,3196,qld,19861129,1392776\\n',\n",
" 'rec-1541-org,jessica,paine,58,eddison place,pine hut,new farm,2022,vic,19661210,8315488\\n']"
"['rec-3302-org,blaize,koopman,17,allison place,aldersyde estate,balwyn north,4650,nsw,19110608,7823755\\n',\n",
" 'rec-1385-org,joel,bishop,10,french street,cedarview,orange,3223,nt,,1324854\\n',\n",
" 'rec-190-org,,alias,24,elkington street,pangani,isle of capri,2145,sa,19650429,8261472\\n',\n",
" 'rec-4781-org,jacob,waller,89,dalley crescent,the willows,mosman,2480,qld,19580408,6317326\\n',\n",
" 'rec-4881-org,alexandra,nguyen,44,colebatch place,langley flats,freshwater,3242,nsw,19511004,6416159\\n',\n",
" 'rec-4770-org,tegan,rosendale,1,sherbrooke street,nazareth village,innaloo,2250,wa,19801011,9351309\\n',\n",
" 'rec-3385-org,shanaye,carbone,41,haystack crescent,st vincents hospital,matong,3690,nsw,19300519,1632237\\n',\n",
" 'rec-3738-org,imogen,carlington,45,mcinnes street,parish talowahl,girilambone,2154,nsw,19781117,7912921\\n',\n",
" 'rec-831-org,laura,flannery,54,sid barnes crescent,weemilah,winston hills,5073,qld,19581023,9712180\\n',\n",
" 'rec-815-org,holly,campbell,21,casey crescent,nestor,westmead,4573,qld,19911007,4424335\\n']"
]
},
"execution_count": 24,
Expand All @@ -959,16 +964,16 @@
{
"data": {
"text/plain": [
"['rec-1225-dup-0,hayden,ballantyne,13,,,young,2077,nsw,19330812,3414771\\n',\n",
" 'rec-4635-dup-0,isaeblla,white,8,cooling place,massey green,rosebud,6151,sa,19990911,2206317\\n',\n",
" 'rec-1790-dup-0,shannon,heurr,65,fossey street,brindabella specialist centre,vaucluse,2010,qld,19511013,9539538\\n',\n",
" 'rec-2882-dup-0,sarah,eglinton,19,beasleyz street,,naraocorte,4012,nsw,19451107,4310446\\n',\n",
" 'rec-3521-dup-0,spencer,bates-brownsword,151,tora,pinkerton circuit,smithfield,4860,nsw,19810308,5402648\\n',\n",
" 'rec-2055-dup-0,taiz,garven,,finniss crescent,donetted owns,pymble,2035,nsw,19930723,6253715\\n',\n",
" 'rec-1529-dup-0,ebonie,campbell,32,gellibrand street,carowood,kessick,3148,vic,19271210,2544494\\n',\n",
" 'rec-1817-dup-0,noah,boyle,11,doolandcouhrt,,flowerdale,3163,vic,19260331,7756654\\n',\n",
" 'rec-4200-dup-0,lara,sekuless,9,loch sutreet,,yarraville,3196,qld,19861129,1392776\\n',\n",
" 'rec-1541-dup-0,jessica,paine,58,eddisonv place,pine hut,new farm,2022,vic,19661210,8315488\\n']"
"['rec-3302-dup-0,blaize,koopman,17,allison place,aldersydeestate,balwyn north,4650,nsw,19110608,7823755\\n',\n",
" 'rec-1385-dup-0,elton,bishop,10,french street,,orange,3223,nt,,1324854\\n',\n",
" 'rec-190-dup-0,,alias,24,elkington street,panganu,isle of capri,2145,sa,19650429,8261472\\n',\n",
" 'rec-4781-dup-0,jacob,waliler,89,dalley crescent,the ui llows,mosman,2487,qld,19580408,6317326\\n',\n",
" 'rec-4881-dup-0,nguyen,alexandra,44,colebatch place,langley flats,freshwater,3242,nsw,19511004,6416159\\n',\n",
" 'rec-4770-dup-0,tegan,rosendale,1,sherbrooke street,nazareth village,innaloo,2550,nsw,19801011,9351309\\n',\n",
" 'rec-3385-dup-0,shanaye,lonto,41,haystack crescent,,leetob,3680,nsw,19300519,1632237\\n',\n",
" 'rec-3738-dup-0,imogen,carlington,45,mcinnes treet,parish talowahl,girilabmone,2154,nsw,19781117,7912921\\n',\n",
" 'rec-831-dup-0,laura,flannery,54,sid barnes crescent,,winstonhills,5073,qld,19581023,9712180\\n',\n",
" 'rec-815-dup-0,holyl,campbell,21,casey crescent,,westmead,4573,qld,19911007,4424335\\n']"
]
},
"execution_count": 25,
Expand Down Expand Up @@ -1006,16 +1011,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Hayden Ballantyne (rec-1225-org) =? Hayden Ballantyne (rec-1225-dup-0)\n",
"Isabella White (rec-4635-org) =? Isaeblla White (rec-4635-dup-0)\n",
"Bailey Heuer (rec-1790-org) =? Shannon Heurr (rec-1790-dup-0)\n",
"Sarah Eglinton (rec-2882-org) =? Sarah Eglinton (rec-2882-dup-0)\n",
"Spencer Bates-Brownsword (rec-3521-org) =? Spencer Bates-Brownsword (rec-3521-dup-0)\n",
"Tai Garven (rec-2055-org) =? Taiz Garven (rec-2055-dup-0)\n",
"Zachariah Campbell (rec-1529-org) =? Ebonie Campbell (rec-1529-dup-0)\n",
"Noah Boyle (rec-1817-org) =? Noah Boyle (rec-1817-dup-0)\n",
"Lara Sekuless (rec-4200-org) =? Lara Sekuless (rec-4200-dup-0)\n",
"Jessica Paine (rec-1541-org) =? Jessica Paine (rec-1541-dup-0)\n"
"Blaize Koopman (rec-3302-org) =? Blaize Koopman (rec-3302-dup-0)\n",
"Joel Bishop (rec-1385-org) =? Elton Bishop (rec-1385-dup-0)\n",
" Alias (rec-190-org) =? Alias (rec-190-dup-0)\n",
"Jacob Waller (rec-4781-org) =? Jacob Waliler (rec-4781-dup-0)\n",
"Alexandra Nguyen (rec-4881-org) =? Nguyen Alexandra (rec-4881-dup-0)\n",
"Tegan Rosendale (rec-4770-org) =? Tegan Rosendale (rec-4770-dup-0)\n",
"Shanaye Carbone (rec-3385-org) =? Shanaye Lonto (rec-3385-dup-0)\n",
"Imogen Carlington (rec-3738-org) =? Imogen Carlington (rec-3738-dup-0)\n",
"Laura Flannery (rec-831-org) =? Laura Flannery (rec-831-dup-0)\n",
"Holly Campbell (rec-815-org) =? Holyl Campbell (rec-815-dup-0)\n"
]
}
],
Expand Down Expand Up @@ -1087,10 +1092,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[31mProject deleted\u001b[0m\r\n"
]
}
],
"source": [
"# Deleting the project\n",
"!clkutil delete-project \\\n",
" --project=\"{credentials['project_id']}\" \\\n",
" --apikey=\"{credentials['result_token']}\" \\\n",
" --server=\"{url}\""
]
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorial/Record Linkage API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.4"
},
"pycharm": {
"stem_cell": {
Expand Down
45 changes: 35 additions & 10 deletions docs/tutorial/Similarity Scores.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{\"project_count\": 6542, \"rate\": 2549867, \"status\": \"ok\"}\r\n"
"{\"project_count\": 7082, \"rate\": 2845548, \"status\": \"ok\"}\r\n"
]
}
],
Expand Down Expand Up @@ -313,7 +313,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmpf_28_avn\n"
"Overwriting /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmpgayhu51z\n"
]
}
],
Expand Down Expand Up @@ -555,17 +555,17 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Credentials will be saved in /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmpd6lzqk30\n",
"Credentials will be saved in /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmp6fab0949\n",
"\u001b[31mProject created\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'project_id': 'a28958f3c6df6afa3cdbe0337a2621f1a76ce4c6929fb772',\n",
" 'result_token': 'dde8c58598ea98de862ae5c4e48ec3acfe342162a4133afd',\n",
" 'update_tokens': ['1535fe32ca6becf8fe91b0de32d2e47d9e3edddb72017205',\n",
" 'c50cb50080d0345fb8407ad9a974323567a054884ab2f4d1']}"
"{'project_id': '224ac23b0c6ba6c661ade4082d0741fc94b9af3ebf09f9fd',\n",
" 'result_token': '331d33ba45f4b636aac944ba1ba52f2602a3f18bcff9ff25',\n",
" 'update_tokens': ['8826451e71fb0ea6f1b07d7e54264ab7477f4d97898f8ce5',\n",
" '122076e7ef37ccbedda55d028210d4e5a14d9441329ea492']}"
]
},
"execution_count": 8,
Expand All @@ -577,7 +577,12 @@
"creds = NamedTemporaryFile('wt')\n",
"print(\"Credentials will be saved in\", creds.name)\n",
"\n",
"!clkutil create-project --schema \"{schema.name}\" --output \"{creds.name}\" --type \"similarity_scores\" --server \"{url}\"\n",
"!clkutil create-project \\\n",
" --schema \"{schema.name}\" \\\n",
" --output \"{creds.name}\" \\\n",
" --type \"similarity_scores\" \\\n",
" --server \"{url}\"\n",
"\n",
"creds.seek(0)\n",
"\n",
"with open(creds.name, 'r') as f:\n",
Expand Down Expand Up @@ -613,8 +618,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[31mCLK data written to /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmp3hr0gbdc.json\u001b[0m\n",
"\u001b[31mCLK data written to /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmp8ba6c8bt.json\u001b[0m\n"
"\u001b[31mCLK data written to /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmp2s_mj16v.json\u001b[0m\n",
"\u001b[31mCLK data written to /var/folders/mw/21b9jb5d1c9_3_z0dq7hpx1m00j_0b/T/tmpvlxznb1y.json\u001b[0m\n"
]
}
],
Expand Down Expand Up @@ -1017,6 +1022,26 @@
"When choosing a similarity threshold for solving, the valley between these two distributions is a good starting point. In this example, it is around 0.82. We can see that almost all similarity scores above 0.82 are from matches, thus the solver will produce a linkage result with high precision. However, recall will not be optimal, as there are still some scores from matches below 0.82. By moving the threshold to either side, you can favour either precision or recall."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[31mProject deleted\u001b[0m\r\n"
]
}
],
"source": [
"# Deleting the project\n",
"!clkutil delete-project --project=\"{credentials['project_id']}\" \\\n",
" --apikey=\"{credentials['result_token']}\" \\\n",
" --server=\"{url}\""
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorial/multiparty-linkage-in-entity-service.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.4"
},
"pycharm": {
"stem_cell": {
Expand Down
Loading