|
16 | 16 | }, |
17 | 17 | { |
18 | 18 | "cell_type": "code", |
19 | | - "execution_count": 45, |
20 | | - "metadata": { |
21 | | - "collapsed": true |
22 | | - }, |
| 19 | + "execution_count": 6, |
| 20 | + "metadata": {}, |
23 | 21 | "outputs": [], |
24 | 22 | "source": [ |
| 23 | + "from sklearn.datasets import fetch_mldata\n", |
| 24 | + "\n", |
25 | 25 | "# Change data_home to wherever to where you want to download your data\n", |
26 | 26 | "mnist = fetch_mldata('MNIST original', data_home='~/Desktop/alternativeData')" |
27 | 27 | ] |
28 | 28 | }, |
29 | 29 | { |
30 | 30 | "cell_type": "code", |
31 | | - "execution_count": 46, |
| 31 | + "execution_count": 7, |
32 | 32 | "metadata": {}, |
33 | 33 | "outputs": [ |
34 | 34 | { |
|
46 | 46 | " 'target': array([ 0., 0., 0., ..., 9., 9., 9.])}" |
47 | 47 | ] |
48 | 48 | }, |
49 | | - "execution_count": 46, |
| 49 | + "execution_count": 7, |
50 | 50 | "metadata": {}, |
51 | 51 | "output_type": "execute_result" |
52 | 52 | } |
|
57 | 57 | }, |
58 | 58 | { |
59 | 59 | "cell_type": "code", |
60 | | - "execution_count": 47, |
| 60 | + "execution_count": 8, |
61 | 61 | "metadata": {}, |
62 | 62 | "outputs": [ |
63 | 63 | { |
|
66 | 66 | "(70000, 784)" |
67 | 67 | ] |
68 | 68 | }, |
69 | | - "execution_count": 47, |
| 69 | + "execution_count": 8, |
70 | 70 | "metadata": {}, |
71 | 71 | "output_type": "execute_result" |
72 | 72 | } |
|
78 | 78 | }, |
79 | 79 | { |
80 | 80 | "cell_type": "code", |
81 | | - "execution_count": 48, |
| 81 | + "execution_count": 9, |
82 | 82 | "metadata": {}, |
83 | 83 | "outputs": [ |
84 | 84 | { |
|
87 | 87 | "(70000,)" |
88 | 88 | ] |
89 | 89 | }, |
90 | | - "execution_count": 48, |
| 90 | + "execution_count": 9, |
91 | 91 | "metadata": {}, |
92 | 92 | "output_type": "execute_result" |
93 | 93 | } |
|
106 | 106 | }, |
107 | 107 | { |
108 | 108 | "cell_type": "code", |
109 | | - "execution_count": 49, |
110 | | - "metadata": { |
111 | | - "collapsed": true |
112 | | - }, |
| 109 | + "execution_count": 11, |
| 110 | + "metadata": {}, |
113 | 111 | "outputs": [], |
114 | 112 | "source": [ |
| 113 | + "from sklearn.model_selection import train_test_split\n", |
| 114 | + "\n", |
115 | 115 | "# test_size: what proportion of original data is used for test set\n", |
116 | 116 | "train_img, test_img, train_lbl, test_lbl = train_test_split(\n", |
117 | 117 | " mnist.data, mnist.target, test_size=1/7.0, random_state=0)" |
|
140 | 140 | }, |
141 | 141 | { |
142 | 142 | "cell_type": "code", |
143 | | - "execution_count": 50, |
144 | | - "metadata": { |
145 | | - "collapsed": true |
146 | | - }, |
147 | | - "outputs": [], |
| 143 | + "execution_count": 12, |
| 144 | + "metadata": {}, |
| 145 | + "outputs": [ |
| 146 | + { |
| 147 | + "name": "stderr", |
| 148 | + "output_type": "stream", |
| 149 | + "text": [ |
| 150 | + "/Users/mgalarny/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py:444: DataConversionWarning: Data with input dtype uint8 was converted to float64 by StandardScaler.\n", |
| 151 | + " warnings.warn(msg, DataConversionWarning)\n" |
| 152 | + ] |
| 153 | + } |
| 154 | + ], |
148 | 155 | "source": [ |
149 | 156 | "from sklearn.preprocessing import StandardScaler\n", |
150 | 157 | "scaler = StandardScaler()\n", |
151 | | - "train_img = scaler.fit_transform(train_img)\n", |
| 158 | + "scaler.fit(train_img)\n", |
| 159 | + "train_img = scaler.transform(train_img)\n", |
152 | 160 | "test_img = scaler.transform(test_img)" |
153 | 161 | ] |
154 | 162 | }, |
|
168 | 176 | }, |
169 | 177 | { |
170 | 178 | "cell_type": "code", |
171 | | - "execution_count": 51, |
| 179 | + "execution_count": 13, |
172 | 180 | "metadata": { |
173 | 181 | "collapsed": true |
174 | 182 | }, |
|
186 | 194 | }, |
187 | 195 | { |
188 | 196 | "cell_type": "code", |
189 | | - "execution_count": 52, |
| 197 | + "execution_count": 14, |
190 | 198 | "metadata": { |
191 | 199 | "collapsed": true |
192 | 200 | }, |
|
204 | 212 | }, |
205 | 213 | { |
206 | 214 | "cell_type": "code", |
207 | | - "execution_count": 53, |
| 215 | + "execution_count": 15, |
208 | 216 | "metadata": {}, |
209 | 217 | "outputs": [ |
210 | 218 | { |
|
214 | 222 | " svd_solver='auto', tol=0.0, whiten=False)" |
215 | 223 | ] |
216 | 224 | }, |
217 | | - "execution_count": 53, |
| 225 | + "execution_count": 15, |
218 | 226 | "metadata": {}, |
219 | 227 | "output_type": "execute_result" |
220 | 228 | } |
|
232 | 240 | }, |
233 | 241 | { |
234 | 242 | "cell_type": "code", |
235 | | - "execution_count": 54, |
| 243 | + "execution_count": 16, |
236 | 244 | "metadata": { |
237 | 245 | "collapsed": true |
238 | 246 | }, |
|
258 | 266 | }, |
259 | 267 | { |
260 | 268 | "cell_type": "code", |
261 | | - "execution_count": 55, |
| 269 | + "execution_count": 17, |
262 | 270 | "metadata": { |
263 | 271 | "collapsed": true |
264 | 272 | }, |
|
276 | 284 | }, |
277 | 285 | { |
278 | 286 | "cell_type": "code", |
279 | | - "execution_count": 56, |
| 287 | + "execution_count": 18, |
280 | 288 | "metadata": { |
281 | 289 | "collapsed": true |
282 | 290 | }, |
|
304 | 312 | }, |
305 | 313 | { |
306 | 314 | "cell_type": "code", |
307 | | - "execution_count": 57, |
| 315 | + "execution_count": 19, |
308 | 316 | "metadata": {}, |
309 | 317 | "outputs": [ |
310 | 318 | { |
|
316 | 324 | " verbose=0, warm_start=False)" |
317 | 325 | ] |
318 | 326 | }, |
319 | | - "execution_count": 57, |
| 327 | + "execution_count": 19, |
320 | 328 | "metadata": {}, |
321 | 329 | "output_type": "execute_result" |
322 | 330 | } |
|
341 | 349 | }, |
342 | 350 | { |
343 | 351 | "cell_type": "code", |
344 | | - "execution_count": 58, |
| 352 | + "execution_count": 20, |
345 | 353 | "metadata": {}, |
346 | 354 | "outputs": [ |
347 | 355 | { |
|
350 | 358 | "array([ 1.])" |
351 | 359 | ] |
352 | 360 | }, |
353 | | - "execution_count": 58, |
| 361 | + "execution_count": 20, |
354 | 362 | "metadata": {}, |
355 | 363 | "output_type": "execute_result" |
356 | 364 | } |
|
363 | 371 | }, |
364 | 372 | { |
365 | 373 | "cell_type": "code", |
366 | | - "execution_count": 59, |
| 374 | + "execution_count": 21, |
367 | 375 | "metadata": {}, |
368 | 376 | "outputs": [ |
369 | 377 | { |
|
372 | 380 | "array([ 1., 9., 2., 2., 7., 1., 8., 3., 3., 7.])" |
373 | 381 | ] |
374 | 382 | }, |
375 | | - "execution_count": 59, |
| 383 | + "execution_count": 21, |
376 | 384 | "metadata": {}, |
377 | 385 | "output_type": "execute_result" |
378 | 386 | } |
|
405 | 413 | }, |
406 | 414 | { |
407 | 415 | "cell_type": "code", |
408 | | - "execution_count": 60, |
| 416 | + "execution_count": 23, |
409 | 417 | "metadata": {}, |
410 | 418 | "outputs": [ |
411 | 419 | { |
|
420 | 428 | "score = logisticRegr.score(test_img, test_lbl)\n", |
421 | 429 | "print(score)" |
422 | 430 | ] |
423 | | - }, |
424 | | - { |
425 | | - "cell_type": "code", |
426 | | - "execution_count": null, |
427 | | - "metadata": { |
428 | | - "collapsed": true |
429 | | - }, |
430 | | - "outputs": [], |
431 | | - "source": [] |
432 | 431 | } |
433 | 432 | ], |
434 | 433 | "metadata": { |
|
0 commit comments