|
16 | 16 | }, |
17 | 17 | { |
18 | 18 | "cell_type": "code", |
19 | | - "execution_count": 31, |
| 19 | + "execution_count": 45, |
20 | 20 | "metadata": { |
21 | 21 | "collapsed": true |
22 | 22 | }, |
|
28 | 28 | }, |
29 | 29 | { |
30 | 30 | "cell_type": "code", |
31 | | - "execution_count": 32, |
| 31 | + "execution_count": 46, |
32 | 32 | "metadata": {}, |
33 | 33 | "outputs": [ |
34 | 34 | { |
|
46 | 46 | " 'target': array([ 0., 0., 0., ..., 9., 9., 9.])}" |
47 | 47 | ] |
48 | 48 | }, |
49 | | - "execution_count": 32, |
| 49 | + "execution_count": 46, |
50 | 50 | "metadata": {}, |
51 | 51 | "output_type": "execute_result" |
52 | 52 | } |
|
57 | 57 | }, |
58 | 58 | { |
59 | 59 | "cell_type": "code", |
60 | | - "execution_count": 33, |
| 60 | + "execution_count": 47, |
61 | 61 | "metadata": {}, |
62 | 62 | "outputs": [ |
63 | 63 | { |
|
66 | 66 | "(70000, 784)" |
67 | 67 | ] |
68 | 68 | }, |
69 | | - "execution_count": 33, |
| 69 | + "execution_count": 47, |
70 | 70 | "metadata": {}, |
71 | 71 | "output_type": "execute_result" |
72 | 72 | } |
|
78 | 78 | }, |
79 | 79 | { |
80 | 80 | "cell_type": "code", |
81 | | - "execution_count": 34, |
| 81 | + "execution_count": 48, |
82 | 82 | "metadata": {}, |
83 | 83 | "outputs": [ |
84 | 84 | { |
|
87 | 87 | "(70000,)" |
88 | 88 | ] |
89 | 89 | }, |
90 | | - "execution_count": 34, |
| 90 | + "execution_count": 48, |
91 | 91 | "metadata": {}, |
92 | 92 | "output_type": "execute_result" |
93 | 93 | } |
|
97 | 97 | "mnist.target.shape" |
98 | 98 | ] |
99 | 99 | }, |
100 | | - { |
101 | | - "cell_type": "markdown", |
102 | | - "metadata": {}, |
103 | | - "source": [ |
104 | | - "## Standardizing the Data" |
105 | | - ] |
106 | | - }, |
107 | | - { |
108 | | - "cell_type": "markdown", |
109 | | - "metadata": {}, |
110 | | - "source": [ |
111 | | - "Since PCA yields a feature subspace that maximizes the variance along the axes, it makes sense to standardize the data, especially, if it was measured on different scales." |
112 | | - ] |
113 | | - }, |
114 | | - { |
115 | | - "cell_type": "markdown", |
116 | | - "metadata": {}, |
117 | | - "source": [ |
118 | | - "Notebook going over the importance of feature Scaling: http://scikit-learn.org/stable/auto_examples/preprocessing/plot_scaling_importance.html#sphx-glr-auto-examples-preprocessing-plot-scaling-importance-py\n" |
119 | | - ] |
120 | | - }, |
121 | | - { |
122 | | - "cell_type": "code", |
123 | | - "execution_count": 35, |
124 | | - "metadata": { |
125 | | - "collapsed": true |
126 | | - }, |
127 | | - "outputs": [], |
128 | | - "source": [ |
129 | | - "# Standardize features by removing the mean and scaling to unit variance\n", |
130 | | - "mnist.data = StandardScaler().fit_transform(mnist.data)" |
131 | | - ] |
132 | | - }, |
133 | 100 | { |
134 | 101 | "cell_type": "markdown", |
135 | 102 | "metadata": {}, |
|
139 | 106 | }, |
140 | 107 | { |
141 | 108 | "cell_type": "code", |
142 | | - "execution_count": 36, |
| 109 | + "execution_count": 49, |
143 | 110 | "metadata": { |
144 | 111 | "collapsed": true |
145 | 112 | }, |
|
151 | 118 | ] |
152 | 119 | }, |
153 | 120 | { |
154 | | - "cell_type": "code", |
155 | | - "execution_count": 37, |
| 121 | + "cell_type": "markdown", |
156 | 122 | "metadata": {}, |
157 | | - "outputs": [ |
158 | | - { |
159 | | - "name": "stdout", |
160 | | - "output_type": "stream", |
161 | | - "text": [ |
162 | | - "(60000, 784)\n" |
163 | | - ] |
164 | | - } |
165 | | - ], |
166 | 123 | "source": [ |
167 | | - "print(train_img.shape)" |
| 124 | + "## Standardizing the Data" |
168 | 125 | ] |
169 | 126 | }, |
170 | 127 | { |
171 | | - "cell_type": "code", |
172 | | - "execution_count": 38, |
| 128 | + "cell_type": "markdown", |
173 | 129 | "metadata": {}, |
174 | | - "outputs": [ |
175 | | - { |
176 | | - "name": "stdout", |
177 | | - "output_type": "stream", |
178 | | - "text": [ |
179 | | - "(60000,)\n" |
180 | | - ] |
181 | | - } |
182 | | - ], |
183 | 130 | "source": [ |
184 | | - "print(train_lbl.shape)" |
| 131 | + "Since PCA yields a feature subspace that maximizes the variance along the axes, it makes sense to standardize the data, especially, if it was measured on different scales." |
185 | 132 | ] |
186 | 133 | }, |
187 | 134 | { |
188 | | - "cell_type": "code", |
189 | | - "execution_count": 39, |
| 135 | + "cell_type": "markdown", |
190 | 136 | "metadata": {}, |
191 | | - "outputs": [ |
192 | | - { |
193 | | - "name": "stdout", |
194 | | - "output_type": "stream", |
195 | | - "text": [ |
196 | | - "(10000, 784)\n" |
197 | | - ] |
198 | | - } |
199 | | - ], |
200 | 137 | "source": [ |
201 | | - "print(test_img.shape)" |
| 138 | + "Notebook going over the importance of feature Scaling: http://scikit-learn.org/stable/auto_examples/preprocessing/plot_scaling_importance.html#sphx-glr-auto-examples-preprocessing-plot-scaling-importance-py" |
202 | 139 | ] |
203 | 140 | }, |
204 | 141 | { |
205 | 142 | "cell_type": "code", |
206 | | - "execution_count": 40, |
207 | | - "metadata": {}, |
208 | | - "outputs": [ |
209 | | - { |
210 | | - "name": "stdout", |
211 | | - "output_type": "stream", |
212 | | - "text": [ |
213 | | - "(10000,)\n" |
214 | | - ] |
215 | | - } |
216 | | - ], |
| 143 | + "execution_count": 50, |
| 144 | + "metadata": { |
| 145 | + "collapsed": true |
| 146 | + }, |
| 147 | + "outputs": [], |
217 | 148 | "source": [ |
218 | | - "print(test_lbl.shape)" |
| 149 | + "from sklearn.preprocessing import StandardScaler\n", |
| 150 | + "scaler = StandardScaler()\n", |
| 151 | + "train_img = scaler.fit_transform(train_img)\n", |
| 152 | + "test_img = scaler.transform(test_img)" |
219 | 153 | ] |
220 | 154 | }, |
221 | 155 | { |
|
234 | 168 | }, |
235 | 169 | { |
236 | 170 | "cell_type": "code", |
237 | | - "execution_count": 41, |
| 171 | + "execution_count": 51, |
238 | 172 | "metadata": { |
239 | 173 | "collapsed": true |
240 | 174 | }, |
|
252 | 186 | }, |
253 | 187 | { |
254 | 188 | "cell_type": "code", |
255 | | - "execution_count": 42, |
| 189 | + "execution_count": 52, |
256 | 190 | "metadata": { |
257 | 191 | "collapsed": true |
258 | 192 | }, |
|
270 | 204 | }, |
271 | 205 | { |
272 | 206 | "cell_type": "code", |
273 | | - "execution_count": 51, |
| 207 | + "execution_count": 53, |
274 | 208 | "metadata": {}, |
275 | 209 | "outputs": [ |
276 | 210 | { |
|
280 | 214 | " svd_solver='auto', tol=0.0, whiten=False)" |
281 | 215 | ] |
282 | 216 | }, |
283 | | - "execution_count": 51, |
| 217 | + "execution_count": 53, |
284 | 218 | "metadata": {}, |
285 | 219 | "output_type": "execute_result" |
286 | 220 | } |
|
298 | 232 | }, |
299 | 233 | { |
300 | 234 | "cell_type": "code", |
301 | | - "execution_count": 15, |
| 235 | + "execution_count": 54, |
302 | 236 | "metadata": { |
303 | 237 | "collapsed": true |
304 | 238 | }, |
|
324 | 258 | }, |
325 | 259 | { |
326 | 260 | "cell_type": "code", |
327 | | - "execution_count": 16, |
| 261 | + "execution_count": 55, |
328 | 262 | "metadata": { |
329 | 263 | "collapsed": true |
330 | 264 | }, |
|
342 | 276 | }, |
343 | 277 | { |
344 | 278 | "cell_type": "code", |
345 | | - "execution_count": 17, |
| 279 | + "execution_count": 56, |
346 | 280 | "metadata": { |
347 | 281 | "collapsed": true |
348 | 282 | }, |
|
370 | 304 | }, |
371 | 305 | { |
372 | 306 | "cell_type": "code", |
373 | | - "execution_count": 18, |
| 307 | + "execution_count": 57, |
374 | 308 | "metadata": {}, |
375 | 309 | "outputs": [ |
376 | 310 | { |
|
382 | 316 | " verbose=0, warm_start=False)" |
383 | 317 | ] |
384 | 318 | }, |
385 | | - "execution_count": 18, |
| 319 | + "execution_count": 57, |
386 | 320 | "metadata": {}, |
387 | 321 | "output_type": "execute_result" |
388 | 322 | } |
|
407 | 341 | }, |
408 | 342 | { |
409 | 343 | "cell_type": "code", |
410 | | - "execution_count": 21, |
| 344 | + "execution_count": 58, |
411 | 345 | "metadata": {}, |
412 | 346 | "outputs": [ |
413 | 347 | { |
|
416 | 350 | "array([ 1.])" |
417 | 351 | ] |
418 | 352 | }, |
419 | | - "execution_count": 21, |
| 353 | + "execution_count": 58, |
420 | 354 | "metadata": {}, |
421 | 355 | "output_type": "execute_result" |
422 | 356 | } |
|
429 | 363 | }, |
430 | 364 | { |
431 | 365 | "cell_type": "code", |
432 | | - "execution_count": 22, |
| 366 | + "execution_count": 59, |
433 | 367 | "metadata": {}, |
434 | 368 | "outputs": [ |
435 | 369 | { |
|
438 | 372 | "array([ 1., 9., 2., 2., 7., 1., 8., 3., 3., 7.])" |
439 | 373 | ] |
440 | 374 | }, |
441 | | - "execution_count": 22, |
| 375 | + "execution_count": 59, |
442 | 376 | "metadata": {}, |
443 | 377 | "output_type": "execute_result" |
444 | 378 | } |
|
471 | 405 | }, |
472 | 406 | { |
473 | 407 | "cell_type": "code", |
474 | | - "execution_count": 30, |
| 408 | + "execution_count": 60, |
475 | 409 | "metadata": {}, |
476 | 410 | "outputs": [ |
477 | 411 | { |
478 | 412 | "name": "stdout", |
479 | 413 | "output_type": "stream", |
480 | 414 | "text": [ |
481 | | - "0.9195\n" |
| 415 | + "0.92\n" |
482 | 416 | ] |
483 | 417 | } |
484 | 418 | ], |
485 | 419 | "source": [ |
486 | 420 | "score = logisticRegr.score(test_img, test_lbl)\n", |
487 | 421 | "print(score)" |
488 | 422 | ] |
| 423 | + }, |
| 424 | + { |
| 425 | + "cell_type": "code", |
| 426 | + "execution_count": null, |
| 427 | + "metadata": { |
| 428 | + "collapsed": true |
| 429 | + }, |
| 430 | + "outputs": [], |
| 431 | + "source": [] |
489 | 432 | } |
490 | 433 | ], |
491 | 434 | "metadata": { |
|
0 commit comments