Skip to content

Commit 494e1fb

Browse files
committed
Remove fnlwgt and use PCA instead of TSNE
1 parent 4dbb1da commit 494e1fb

8 files changed

+32788
-32760
lines changed

Chapter 6/Activity 1 - Train and predict the income of a person.ipynb

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 25,
5+
"execution_count": 35,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -15,18 +15,18 @@
1515
},
1616
{
1717
"cell_type": "code",
18-
"execution_count": 32,
18+
"execution_count": 36,
1919
"metadata": {},
2020
"outputs": [],
2121
"source": [
22-
"data = pd.read_csv(\"../data/adult-data.csv\", names=['age', 'workclass', 'fnlwgt', 'education-num',\n",
22+
"data = pd.read_csv(\"data/adult-data.csv\", names=['age', 'workclass', 'education-num',\n",
2323
" 'occupation', 'capital-gain', 'capital-loss',\n",
2424
" 'hours-per-week', 'income'])"
2525
]
2626
},
2727
{
2828
"cell_type": "code",
29-
"execution_count": 33,
29+
"execution_count": 37,
3030
"metadata": {},
3131
"outputs": [
3232
{
@@ -52,7 +52,6 @@
5252
" <th></th>\n",
5353
" <th>age</th>\n",
5454
" <th>workclass</th>\n",
55-
" <th>fnlwgt</th>\n",
5655
" <th>education-num</th>\n",
5756
" <th>occupation</th>\n",
5857
" <th>capital-gain</th>\n",
@@ -66,7 +65,6 @@
6665
" <th>32556</th>\n",
6766
" <td>27</td>\n",
6867
" <td>Private</td>\n",
69-
" <td>257302</td>\n",
7068
" <td>12</td>\n",
7169
" <td>Tech-support</td>\n",
7270
" <td>0</td>\n",
@@ -78,7 +76,6 @@
7876
" <th>32557</th>\n",
7977
" <td>40</td>\n",
8078
" <td>Private</td>\n",
81-
" <td>154374</td>\n",
8279
" <td>9</td>\n",
8380
" <td>Machine-op-inspct</td>\n",
8481
" <td>0</td>\n",
@@ -90,7 +87,6 @@
9087
" <th>32558</th>\n",
9188
" <td>58</td>\n",
9289
" <td>Private</td>\n",
93-
" <td>151910</td>\n",
9490
" <td>9</td>\n",
9591
" <td>Adm-clerical</td>\n",
9692
" <td>0</td>\n",
@@ -102,7 +98,6 @@
10298
" <th>32559</th>\n",
10399
" <td>22</td>\n",
104100
" <td>Private</td>\n",
105-
" <td>201490</td>\n",
106101
" <td>9</td>\n",
107102
" <td>Adm-clerical</td>\n",
108103
" <td>0</td>\n",
@@ -114,7 +109,6 @@
114109
" <th>32560</th>\n",
115110
" <td>52</td>\n",
116111
" <td>Self-emp-inc</td>\n",
117-
" <td>287927</td>\n",
118112
" <td>9</td>\n",
119113
" <td>Exec-managerial</td>\n",
120114
" <td>15024</td>\n",
@@ -127,22 +121,22 @@
127121
"</div>"
128122
],
129123
"text/plain": [
130-
" age workclass fnlwgt education-num occupation \\\n",
131-
"32556 27 Private 257302 12 Tech-support \n",
132-
"32557 40 Private 154374 9 Machine-op-inspct \n",
133-
"32558 58 Private 151910 9 Adm-clerical \n",
134-
"32559 22 Private 201490 9 Adm-clerical \n",
135-
"32560 52 Self-emp-inc 287927 9 Exec-managerial \n",
124+
" age workclass education-num occupation capital-gain \\\n",
125+
"32556 27 Private 12 Tech-support 0 \n",
126+
"32557 40 Private 9 Machine-op-inspct 0 \n",
127+
"32558 58 Private 9 Adm-clerical 0 \n",
128+
"32559 22 Private 9 Adm-clerical 0 \n",
129+
"32560 52 Self-emp-inc 9 Exec-managerial 15024 \n",
136130
"\n",
137-
" capital-gain capital-loss hours-per-week income \n",
138-
"32556 0 0 38 <=50K \n",
139-
"32557 0 0 40 >50K \n",
140-
"32558 0 0 40 <=50K \n",
141-
"32559 0 0 20 <=50K \n",
142-
"32560 15024 0 40 >50K "
131+
" capital-loss hours-per-week income \n",
132+
"32556 0 38 <=50K \n",
133+
"32557 0 40 >50K \n",
134+
"32558 0 40 <=50K \n",
135+
"32559 0 20 <=50K \n",
136+
"32560 0 40 >50K "
143137
]
144138
},
145-
"execution_count": 33,
139+
"execution_count": 37,
146140
"metadata": {},
147141
"output_type": "execute_result"
148142
}
@@ -153,7 +147,7 @@
153147
},
154148
{
155149
"cell_type": "code",
156-
"execution_count": 34,
150+
"execution_count": 38,
157151
"metadata": {},
158152
"outputs": [],
159153
"source": [
@@ -164,7 +158,7 @@
164158
},
165159
{
166160
"cell_type": "code",
167-
"execution_count": 35,
161+
"execution_count": 39,
168162
"metadata": {},
169163
"outputs": [],
170164
"source": [
@@ -175,7 +169,7 @@
175169
},
176170
{
177171
"cell_type": "code",
178-
"execution_count": 36,
172+
"execution_count": 40,
179173
"metadata": {},
180174
"outputs": [],
181175
"source": [
@@ -185,7 +179,7 @@
185179
},
186180
{
187181
"cell_type": "code",
188-
"execution_count": 37,
182+
"execution_count": 41,
189183
"metadata": {},
190184
"outputs": [],
191185
"source": [
@@ -195,7 +189,7 @@
195189
},
196190
{
197191
"cell_type": "code",
198-
"execution_count": 38,
192+
"execution_count": 42,
199193
"metadata": {},
200194
"outputs": [],
201195
"source": [
@@ -206,7 +200,7 @@
206200
},
207201
{
208202
"cell_type": "code",
209-
"execution_count": 39,
203+
"execution_count": 43,
210204
"metadata": {},
211205
"outputs": [],
212206
"source": [
@@ -215,14 +209,14 @@
215209
},
216210
{
217211
"cell_type": "code",
218-
"execution_count": 40,
212+
"execution_count": 44,
219213
"metadata": {},
220214
"outputs": [
221215
{
222216
"name": "stdout",
223217
"output_type": "stream",
224218
"text": [
225-
"Accuracy: 83.63%\n"
219+
"Accuracy: 83.66%\n"
226220
]
227221
}
228222
],

0 commit comments

Comments
 (0)