Skip to content

Commit a7ef35d

Browse files
committed
Updated gini calculation
1 parent ca63c10 commit a7ef35d

File tree

9 files changed

+281
-137
lines changed

9 files changed

+281
-137
lines changed

.DS_Store

0 Bytes
Binary file not shown.

Sklearn/.DS_Store

0 Bytes
Binary file not shown.

Sklearn/DecisionTrees/.DS_Store

0 Bytes
Binary file not shown.

Sklearn/DecisionTrees/.ipynb_checkpoints/EntropyGiniCalculations-checkpoint.ipynb

Lines changed: 245 additions & 102 deletions
Large diffs are not rendered by default.

Sklearn/DecisionTrees/EntropyGiniCalculations.ipynb

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 70,
5+
"execution_count": 1,
66
"metadata": {
77
"collapsed": false
88
},
@@ -13,7 +13,7 @@
1313
"'%.3f'"
1414
]
1515
},
16-
"execution_count": 70,
16+
"execution_count": 1,
1717
"metadata": {},
1818
"output_type": "execute_result"
1919
}
@@ -39,7 +39,7 @@
3939
},
4040
{
4141
"cell_type": "code",
42-
"execution_count": 71,
42+
"execution_count": 2,
4343
"metadata": {
4444
"collapsed": true
4545
},
@@ -152,7 +152,7 @@
152152
},
153153
{
154154
"cell_type": "code",
155-
"execution_count": 72,
155+
"execution_count": 3,
156156
"metadata": {
157157
"collapsed": false
158158
},
@@ -164,7 +164,7 @@
164164
"<IPython.core.display.Image object>"
165165
]
166166
},
167-
"execution_count": 72,
167+
"execution_count": 3,
168168
"metadata": {},
169169
"output_type": "execute_result"
170170
}
@@ -192,12 +192,12 @@
192192
"cell_type": "markdown",
193193
"metadata": {},
194194
"source": [
195-
"#### Information for Parent"
195+
"## Gini for Parent"
196196
]
197197
},
198198
{
199199
"cell_type": "code",
200-
"execution_count": 73,
200+
"execution_count": 4,
201201
"metadata": {
202202
"collapsed": false
203203
},
@@ -208,7 +208,7 @@
208208
"0.665"
209209
]
210210
},
211-
"execution_count": 73,
211+
"execution_count": 4,
212212
"metadata": {},
213213
"output_type": "execute_result"
214214
}
@@ -221,12 +221,12 @@
221221
"cell_type": "markdown",
222222
"metadata": {},
223223
"source": [
224-
"#### Information for Child Node (left)"
224+
"## Gini for Child Node (left)"
225225
]
226226
},
227227
{
228228
"cell_type": "code",
229-
"execution_count": 74,
229+
"execution_count": 5,
230230
"metadata": {
231231
"collapsed": false
232232
},
@@ -237,7 +237,7 @@
237237
"0.000"
238238
]
239239
},
240-
"execution_count": 74,
240+
"execution_count": 5,
241241
"metadata": {},
242242
"output_type": "execute_result"
243243
}
@@ -250,12 +250,12 @@
250250
"cell_type": "markdown",
251251
"metadata": {},
252252
"source": [
253-
"#### Information for Child Node (right)"
253+
"## Gini for Child Node (right)"
254254
]
255255
},
256256
{
257257
"cell_type": "code",
258-
"execution_count": 75,
258+
"execution_count": 6,
259259
"metadata": {
260260
"collapsed": false
261261
},
@@ -266,7 +266,7 @@
266266
"0.497"
267267
]
268268
},
269-
"execution_count": 75,
269+
"execution_count": 6,
270270
"metadata": {},
271271
"output_type": "execute_result"
272272
}
@@ -279,12 +279,12 @@
279279
"cell_type": "markdown",
280280
"metadata": {},
281281
"source": [
282-
"#### Information Gain (Gini Criterion) "
282+
"## Information Gain"
283283
]
284284
},
285285
{
286286
"cell_type": "code",
287-
"execution_count": 76,
287+
"execution_count": 7,
288288
"metadata": {
289289
"collapsed": false
290290
},
@@ -295,7 +295,7 @@
295295
"0.168"
296296
]
297297
},
298-
"execution_count": 76,
298+
"execution_count": 7,
299299
"metadata": {},
300300
"output_type": "execute_result"
301301
}
@@ -320,7 +320,7 @@
320320
},
321321
{
322322
"cell_type": "code",
323-
"execution_count": 77,
323+
"execution_count": 8,
324324
"metadata": {
325325
"collapsed": false
326326
},
@@ -332,7 +332,7 @@
332332
"<IPython.core.display.Image object>"
333333
]
334334
},
335-
"execution_count": 77,
335+
"execution_count": 8,
336336
"metadata": {},
337337
"output_type": "execute_result"
338338
}
@@ -360,12 +360,12 @@
360360
"cell_type": "markdown",
361361
"metadata": {},
362362
"source": [
363-
"#### Information for Parent"
363+
"## Entropy for Parent"
364364
]
365365
},
366366
{
367367
"cell_type": "code",
368-
"execution_count": 78,
368+
"execution_count": 18,
369369
"metadata": {
370370
"collapsed": false
371371
},
@@ -376,25 +376,26 @@
376376
"1.581711119299905"
377377
]
378378
},
379-
"execution_count": 78,
379+
"execution_count": 18,
380380
"metadata": {},
381381
"output_type": "execute_result"
382382
}
383383
],
384384
"source": [
385-
"-1*( ((38.0 / 112)* np.log2(38.0/112)) + ((40.0 / 112)* np.log2(40.0/112)) + ((34.0 / 112)* np.log2(34.0/112)) )"
385+
"-1*( ((38.0/112)*np.log2(38.0/112))+((40.0/112)*np.log2(40.0/112))\\\n",
386+
" +((34.0/112)*np.log2(34.0/112)) )"
386387
]
387388
},
388389
{
389390
"cell_type": "markdown",
390391
"metadata": {},
391392
"source": [
392-
"#### Information for Child Node (left)"
393+
"## Entropy for Child Node (left)"
393394
]
394395
},
395396
{
396397
"cell_type": "code",
397-
"execution_count": 79,
398+
"execution_count": 19,
398399
"metadata": {
399400
"collapsed": false
400401
},
@@ -405,25 +406,25 @@
405406
"-0.0"
406407
]
407408
},
408-
"execution_count": 79,
409+
"execution_count": 19,
409410
"metadata": {},
410411
"output_type": "execute_result"
411412
}
412413
],
413414
"source": [
414-
"-1*(((38.0 / 38.0)* np.log2(38.0/38.0)))"
415+
"-1*( ((38.0/38)* np.log2(38.0/38)) )"
415416
]
416417
},
417418
{
418419
"cell_type": "markdown",
419420
"metadata": {},
420421
"source": [
421-
"#### Information for Child Node (right)"
422+
"## Entropy for Child Node (right)"
422423
]
423424
},
424425
{
425426
"cell_type": "code",
426-
"execution_count": 80,
427+
"execution_count": 22,
427428
"metadata": {
428429
"collapsed": false
429430
},
@@ -434,25 +435,25 @@
434435
"0.9952525494396791"
435436
]
436437
},
437-
"execution_count": 80,
438+
"execution_count": 22,
438439
"metadata": {},
439440
"output_type": "execute_result"
440441
}
441442
],
442443
"source": [
443-
"-1*( ((40.0 / 74.0)* np.log2(40.0/74.0)) + ((34.0 / 74.0)* np.log2(34.0/74.0)) )"
444+
"-1*( ((40.0/74)* np.log2(40.0/74)) + ((34.0/74)* np.log2(34.0/74)) )"
444445
]
445446
},
446447
{
447448
"cell_type": "markdown",
448449
"metadata": {},
449450
"source": [
450-
"#### Information Gain"
451+
"## Information Gain"
451452
]
452453
},
453454
{
454455
"cell_type": "code",
455-
"execution_count": 81,
456+
"execution_count": 23,
456457
"metadata": {
457458
"collapsed": false
458459
},
@@ -463,7 +464,7 @@
463464
"0.587"
464465
]
465466
},
466-
"execution_count": 81,
467+
"execution_count": 23,
467468
"metadata": {},
468469
"output_type": "execute_result"
469470
}
@@ -476,7 +477,7 @@
476477
"cell_type": "markdown",
477478
"metadata": {},
478479
"source": [
479-
"# Bad Split (for blog purposes)\n",
480+
"# Bad Split (for blog purposes. Ignore this as it is gini not entropy)\n",
480481
"petal length <= 3.6 "
481482
]
482483
},
122 KB
Binary file not shown.
349 KB
Loading
425 KB
Loading

Statistics/.DS_Store

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)