Skip to content

Commit 2376586

Browse files
committed
Updated decision tree calc
1 parent e7e41cf commit 2376586

File tree

11 files changed

+121
-58
lines changed

11 files changed

+121
-58
lines changed

.DS_Store

0 Bytes
Binary file not shown.

Sklearn/.DS_Store

0 Bytes
Binary file not shown.

Sklearn/DecisionTrees/.DS_Store

0 Bytes
Binary file not shown.

Sklearn/DecisionTrees/.ipynb_checkpoints/EntropyGiniCalculations-checkpoint.ipynb

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,22 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 18,
66
"metadata": {
7-
"collapsed": true
7+
"collapsed": false
88
},
9-
"outputs": [],
9+
"outputs": [
10+
{
11+
"data": {
12+
"text/plain": [
13+
"'%.3f'"
14+
]
15+
},
16+
"execution_count": 18,
17+
"metadata": {},
18+
"output_type": "execute_result"
19+
}
20+
],
1021
"source": [
1122
"%matplotlib inline\n",
1223
"\n",
@@ -19,12 +30,16 @@
1930
"\n",
2031
"from sklearn.model_selection import train_test_split\n",
2132
"from sklearn import tree\n",
22-
"from IPython.display import Image"
33+
"from IPython.display import Image\n",
34+
"\n",
35+
"#np.set_printoptions(precision=3)\n",
36+
"\n",
37+
"%precision 3"
2338
]
2439
},
2540
{
2641
"cell_type": "code",
27-
"execution_count": 2,
42+
"execution_count": 19,
2843
"metadata": {
2944
"collapsed": true
3045
},
@@ -73,7 +88,14 @@
7388
"cell_type": "markdown",
7489
"metadata": {},
7590
"source": [
76-
"$$IG(D_P,f)=I(D_p) - \\sum_{j=1}^{m}\\frac{N_j}{N}I(D_j)$$"
91+
"$$\\huge D_p$$"
92+
]
93+
},
94+
{
95+
"cell_type": "markdown",
96+
"metadata": {},
97+
"source": [
98+
"$$\\huge IG(D_p,f)=I(D_p) - \\sum_{j=1}^{m}\\frac{N_j}{N}I(D_j)$$"
7799
]
78100
},
79101
{
@@ -94,7 +116,7 @@
94116
"cell_type": "markdown",
95117
"metadata": {},
96118
"source": [
97-
"$$IG(D_P,\\alpha)=I(D_p) - \\frac{N_{left}}{N}I(D_{left})- \\frac{N_{right}}{N}I(D_{right})$$"
119+
"$$\\huge IG(D_p,f)=I(D_p) - \\frac{N_{left}}{N}I(D_{left})- \\frac{N_{right}}{N}I(D_{right})$$"
98120
]
99121
},
100122
{
@@ -130,7 +152,7 @@
130152
},
131153
{
132154
"cell_type": "code",
133-
"execution_count": 3,
155+
"execution_count": 20,
134156
"metadata": {
135157
"collapsed": false
136158
},
@@ -142,7 +164,7 @@
142164
"<IPython.core.display.Image object>"
143165
]
144166
},
145-
"execution_count": 3,
167+
"execution_count": 20,
146168
"metadata": {},
147169
"output_type": "execute_result"
148170
}
@@ -170,23 +192,23 @@
170192
"cell_type": "markdown",
171193
"metadata": {},
172194
"source": [
173-
"#### Information for Parent Dataset"
195+
"#### Information for Parent"
174196
]
175197
},
176198
{
177199
"cell_type": "code",
178-
"execution_count": 13,
200+
"execution_count": 21,
179201
"metadata": {
180202
"collapsed": false
181203
},
182204
"outputs": [
183205
{
184206
"data": {
185207
"text/plain": [
186-
"0.6651785714285714"
208+
"0.665"
187209
]
188210
},
189-
"execution_count": 13,
211+
"execution_count": 21,
190212
"metadata": {},
191213
"output_type": "execute_result"
192214
}
@@ -202,27 +224,20 @@
202224
"#### Information for Child Node (left)"
203225
]
204226
},
205-
{
206-
"cell_type": "markdown",
207-
"metadata": {},
208-
"source": [
209-
"Pure node so no calculation needed"
210-
]
211-
},
212227
{
213228
"cell_type": "code",
214-
"execution_count": 15,
229+
"execution_count": 22,
215230
"metadata": {
216231
"collapsed": false
217232
},
218233
"outputs": [
219234
{
220235
"data": {
221236
"text/plain": [
222-
"0.0"
237+
"0.000"
223238
]
224239
},
225-
"execution_count": 15,
240+
"execution_count": 22,
226241
"metadata": {},
227242
"output_type": "execute_result"
228243
}
@@ -240,18 +255,18 @@
240255
},
241256
{
242257
"cell_type": "code",
243-
"execution_count": 16,
258+
"execution_count": 23,
244259
"metadata": {
245260
"collapsed": false
246261
},
247262
"outputs": [
248263
{
249264
"data": {
250265
"text/plain": [
251-
"0.4967129291453616"
266+
"0.497"
252267
]
253268
},
254-
"execution_count": 16,
269+
"execution_count": 23,
255270
"metadata": {},
256271
"output_type": "execute_result"
257272
}
@@ -298,6 +313,26 @@
298313
"Image(filename = PATH[0] + \"/Graphviz_Dot_Examples/iris_depth1_entropy_decisionTree.png\")"
299314
]
300315
},
316+
{
317+
"cell_type": "code",
318+
"execution_count": null,
319+
"metadata": {
320+
"collapsed": true
321+
},
322+
"outputs": [],
323+
"source": [
324+
" np.set_printoptions(precision=3)"
325+
]
326+
},
327+
{
328+
"cell_type": "code",
329+
"execution_count": null,
330+
"metadata": {
331+
"collapsed": true
332+
},
333+
"outputs": [],
334+
"source": []
335+
},
301336
{
302337
"cell_type": "markdown",
303338
"metadata": {},
@@ -316,7 +351,7 @@
316351
"cell_type": "markdown",
317352
"metadata": {},
318353
"source": [
319-
"#### Information for Parent Dataset"
354+
"#### Information for Parent"
320355
]
321356
},
322357
{
@@ -338,7 +373,7 @@
338373
}
339374
],
340375
"source": [
341-
"-1 * ( ((38.0 / 112)* np.log2(38.0/112)) + ((40.0 / 112)* np.log2(40.0/112)) + ((34.0 / 112)* np.log2(34.0/112)))"
376+
"-1*( ((38.0 / 112)* np.log2(38.0/112)) + ((40.0 / 112)* np.log2(40.0/112)) + ((34.0 / 112)* np.log2(34.0/112)) )"
342377
]
343378
},
344379
{
@@ -367,7 +402,7 @@
367402
}
368403
],
369404
"source": [
370-
"-1 * ( ((38.0 / 38.0)* np.log2(38.0/38.0)) )"
405+
"-1*(((38.0 / 38.0)* np.log2(38.0/38.0)))"
371406
]
372407
},
373408
{
@@ -396,7 +431,7 @@
396431
}
397432
],
398433
"source": [
399-
"-1 * ( ((40.0 / 74.0)* np.log2(40.0/74.0)) + ((34.0 / 74.0)* np.log2(34.0/74.0)) )"
434+
"-1*( ((40.0 / 74.0)* np.log2(40.0/74.0)) + ((34.0 / 74.0)* np.log2(34.0/74.0)) )"
400435
]
401436
}
402437
],

0 commit comments

Comments
 (0)