|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "code", |
5 | | - "execution_count": 70, |
| 5 | + "execution_count": 1, |
6 | 6 | "metadata": { |
7 | 7 | "collapsed": false |
8 | 8 | }, |
|
13 | 13 | "'%.3f'" |
14 | 14 | ] |
15 | 15 | }, |
16 | | - "execution_count": 70, |
| 16 | + "execution_count": 1, |
17 | 17 | "metadata": {}, |
18 | 18 | "output_type": "execute_result" |
19 | 19 | } |
|
39 | 39 | }, |
40 | 40 | { |
41 | 41 | "cell_type": "code", |
42 | | - "execution_count": 71, |
| 42 | + "execution_count": 2, |
43 | 43 | "metadata": { |
44 | 44 | "collapsed": true |
45 | 45 | }, |
|
152 | 152 | }, |
153 | 153 | { |
154 | 154 | "cell_type": "code", |
155 | | - "execution_count": 72, |
| 155 | + "execution_count": 3, |
156 | 156 | "metadata": { |
157 | 157 | "collapsed": false |
158 | 158 | }, |
|
164 | 164 | "<IPython.core.display.Image object>" |
165 | 165 | ] |
166 | 166 | }, |
167 | | - "execution_count": 72, |
| 167 | + "execution_count": 3, |
168 | 168 | "metadata": {}, |
169 | 169 | "output_type": "execute_result" |
170 | 170 | } |
|
192 | 192 | "cell_type": "markdown", |
193 | 193 | "metadata": {}, |
194 | 194 | "source": [ |
195 | | - "#### Information for Parent" |
| 195 | + "## Gini for Parent" |
196 | 196 | ] |
197 | 197 | }, |
198 | 198 | { |
199 | 199 | "cell_type": "code", |
200 | | - "execution_count": 73, |
| 200 | + "execution_count": 4, |
201 | 201 | "metadata": { |
202 | 202 | "collapsed": false |
203 | 203 | }, |
|
208 | 208 | "0.665" |
209 | 209 | ] |
210 | 210 | }, |
211 | | - "execution_count": 73, |
| 211 | + "execution_count": 4, |
212 | 212 | "metadata": {}, |
213 | 213 | "output_type": "execute_result" |
214 | 214 | } |
|
221 | 221 | "cell_type": "markdown", |
222 | 222 | "metadata": {}, |
223 | 223 | "source": [ |
224 | | - "#### Information for Child Node (left)" |
| 224 | + "## Gini for Child Node (left)" |
225 | 225 | ] |
226 | 226 | }, |
227 | 227 | { |
228 | 228 | "cell_type": "code", |
229 | | - "execution_count": 74, |
| 229 | + "execution_count": 5, |
230 | 230 | "metadata": { |
231 | 231 | "collapsed": false |
232 | 232 | }, |
|
237 | 237 | "0.000" |
238 | 238 | ] |
239 | 239 | }, |
240 | | - "execution_count": 74, |
| 240 | + "execution_count": 5, |
241 | 241 | "metadata": {}, |
242 | 242 | "output_type": "execute_result" |
243 | 243 | } |
|
250 | 250 | "cell_type": "markdown", |
251 | 251 | "metadata": {}, |
252 | 252 | "source": [ |
253 | | - "#### Information for Child Node (right)" |
| 253 | + "## Gini for Child Node (right)" |
254 | 254 | ] |
255 | 255 | }, |
256 | 256 | { |
257 | 257 | "cell_type": "code", |
258 | | - "execution_count": 75, |
| 258 | + "execution_count": 6, |
259 | 259 | "metadata": { |
260 | 260 | "collapsed": false |
261 | 261 | }, |
|
266 | 266 | "0.497" |
267 | 267 | ] |
268 | 268 | }, |
269 | | - "execution_count": 75, |
| 269 | + "execution_count": 6, |
270 | 270 | "metadata": {}, |
271 | 271 | "output_type": "execute_result" |
272 | 272 | } |
|
279 | 279 | "cell_type": "markdown", |
280 | 280 | "metadata": {}, |
281 | 281 | "source": [ |
282 | | - "#### Information Gain (Gini Criterion) " |
| 282 | + "## Information Gain" |
283 | 283 | ] |
284 | 284 | }, |
285 | 285 | { |
286 | 286 | "cell_type": "code", |
287 | | - "execution_count": 76, |
| 287 | + "execution_count": 7, |
288 | 288 | "metadata": { |
289 | 289 | "collapsed": false |
290 | 290 | }, |
|
295 | 295 | "0.168" |
296 | 296 | ] |
297 | 297 | }, |
298 | | - "execution_count": 76, |
| 298 | + "execution_count": 7, |
299 | 299 | "metadata": {}, |
300 | 300 | "output_type": "execute_result" |
301 | 301 | } |
|
320 | 320 | }, |
321 | 321 | { |
322 | 322 | "cell_type": "code", |
323 | | - "execution_count": 77, |
| 323 | + "execution_count": 8, |
324 | 324 | "metadata": { |
325 | 325 | "collapsed": false |
326 | 326 | }, |
|
332 | 332 | "<IPython.core.display.Image object>" |
333 | 333 | ] |
334 | 334 | }, |
335 | | - "execution_count": 77, |
| 335 | + "execution_count": 8, |
336 | 336 | "metadata": {}, |
337 | 337 | "output_type": "execute_result" |
338 | 338 | } |
|
360 | 360 | "cell_type": "markdown", |
361 | 361 | "metadata": {}, |
362 | 362 | "source": [ |
363 | | - "#### Information for Parent" |
| 363 | + "## Entropy for Parent" |
364 | 364 | ] |
365 | 365 | }, |
366 | 366 | { |
367 | 367 | "cell_type": "code", |
368 | | - "execution_count": 78, |
| 368 | + "execution_count": 18, |
369 | 369 | "metadata": { |
370 | 370 | "collapsed": false |
371 | 371 | }, |
|
376 | 376 | "1.581711119299905" |
377 | 377 | ] |
378 | 378 | }, |
379 | | - "execution_count": 78, |
| 379 | + "execution_count": 18, |
380 | 380 | "metadata": {}, |
381 | 381 | "output_type": "execute_result" |
382 | 382 | } |
383 | 383 | ], |
384 | 384 | "source": [ |
385 | | - "-1*( ((38.0 / 112)* np.log2(38.0/112)) + ((40.0 / 112)* np.log2(40.0/112)) + ((34.0 / 112)* np.log2(34.0/112)) )" |
| 385 | + "-1*( ((38.0/112)*np.log2(38.0/112))+((40.0/112)*np.log2(40.0/112))\\\n", |
| 386 | + " +((34.0/112)*np.log2(34.0/112)) )" |
386 | 387 | ] |
387 | 388 | }, |
388 | 389 | { |
389 | 390 | "cell_type": "markdown", |
390 | 391 | "metadata": {}, |
391 | 392 | "source": [ |
392 | | - "#### Information for Child Node (left)" |
| 393 | + "## Entropy for Child Node (left)" |
393 | 394 | ] |
394 | 395 | }, |
395 | 396 | { |
396 | 397 | "cell_type": "code", |
397 | | - "execution_count": 79, |
| 398 | + "execution_count": 19, |
398 | 399 | "metadata": { |
399 | 400 | "collapsed": false |
400 | 401 | }, |
|
405 | 406 | "-0.0" |
406 | 407 | ] |
407 | 408 | }, |
408 | | - "execution_count": 79, |
| 409 | + "execution_count": 19, |
409 | 410 | "metadata": {}, |
410 | 411 | "output_type": "execute_result" |
411 | 412 | } |
412 | 413 | ], |
413 | 414 | "source": [ |
414 | | - "-1*(((38.0 / 38.0)* np.log2(38.0/38.0)))" |
| 415 | + "-1*( ((38.0/38)* np.log2(38.0/38)) )" |
415 | 416 | ] |
416 | 417 | }, |
417 | 418 | { |
418 | 419 | "cell_type": "markdown", |
419 | 420 | "metadata": {}, |
420 | 421 | "source": [ |
421 | | - "#### Information for Child Node (right)" |
| 422 | + "## Entropy for Child Node (right)" |
422 | 423 | ] |
423 | 424 | }, |
424 | 425 | { |
425 | 426 | "cell_type": "code", |
426 | | - "execution_count": 80, |
| 427 | + "execution_count": 22, |
427 | 428 | "metadata": { |
428 | 429 | "collapsed": false |
429 | 430 | }, |
|
434 | 435 | "0.9952525494396791" |
435 | 436 | ] |
436 | 437 | }, |
437 | | - "execution_count": 80, |
| 438 | + "execution_count": 22, |
438 | 439 | "metadata": {}, |
439 | 440 | "output_type": "execute_result" |
440 | 441 | } |
441 | 442 | ], |
442 | 443 | "source": [ |
443 | | - "-1*( ((40.0 / 74.0)* np.log2(40.0/74.0)) + ((34.0 / 74.0)* np.log2(34.0/74.0)) )" |
| 444 | + "-1*( ((40.0/74)* np.log2(40.0/74)) + ((34.0/74)* np.log2(34.0/74)) )" |
444 | 445 | ] |
445 | 446 | }, |
446 | 447 | { |
447 | 448 | "cell_type": "markdown", |
448 | 449 | "metadata": {}, |
449 | 450 | "source": [ |
450 | | - "#### Information Gain" |
| 451 | + "## Information Gain" |
451 | 452 | ] |
452 | 453 | }, |
453 | 454 | { |
454 | 455 | "cell_type": "code", |
455 | | - "execution_count": 81, |
| 456 | + "execution_count": 23, |
456 | 457 | "metadata": { |
457 | 458 | "collapsed": false |
458 | 459 | }, |
|
463 | 464 | "0.587" |
464 | 465 | ] |
465 | 466 | }, |
466 | | - "execution_count": 81, |
| 467 | + "execution_count": 23, |
467 | 468 | "metadata": {}, |
468 | 469 | "output_type": "execute_result" |
469 | 470 | } |
|
476 | 477 | "cell_type": "markdown", |
477 | 478 | "metadata": {}, |
478 | 479 | "source": [ |
479 | | - "# Bad Split (for blog purposes)\n", |
| 480 | + "# Bad Split (for blog purposes. Ignore this as it is gini not entropy)\n", |
480 | 481 | "petal length <= 3.6 " |
481 | 482 | ] |
482 | 483 | }, |
|
0 commit comments