Skip to content

Commit de3f7d3

Browse files
committed
Pipeline for PCA
1 parent 12a369a commit de3f7d3

File tree

2 files changed

+284
-43
lines changed

2 files changed

+284
-43
lines changed
Lines changed: 42 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,19 @@
1616
},
1717
{
1818
"cell_type": "code",
19-
"execution_count": 45,
20-
"metadata": {
21-
"collapsed": true
22-
},
19+
"execution_count": 6,
20+
"metadata": {},
2321
"outputs": [],
2422
"source": [
23+
"from sklearn.datasets import fetch_mldata\n",
24+
"\n",
2525
"# Change data_home to wherever to where you want to download your data\n",
2626
"mnist = fetch_mldata('MNIST original', data_home='~/Desktop/alternativeData')"
2727
]
2828
},
2929
{
3030
"cell_type": "code",
31-
"execution_count": 46,
31+
"execution_count": 7,
3232
"metadata": {},
3333
"outputs": [
3434
{
@@ -46,7 +46,7 @@
4646
" 'target': array([ 0., 0., 0., ..., 9., 9., 9.])}"
4747
]
4848
},
49-
"execution_count": 46,
49+
"execution_count": 7,
5050
"metadata": {},
5151
"output_type": "execute_result"
5252
}
@@ -57,7 +57,7 @@
5757
},
5858
{
5959
"cell_type": "code",
60-
"execution_count": 47,
60+
"execution_count": 8,
6161
"metadata": {},
6262
"outputs": [
6363
{
@@ -66,7 +66,7 @@
6666
"(70000, 784)"
6767
]
6868
},
69-
"execution_count": 47,
69+
"execution_count": 8,
7070
"metadata": {},
7171
"output_type": "execute_result"
7272
}
@@ -78,7 +78,7 @@
7878
},
7979
{
8080
"cell_type": "code",
81-
"execution_count": 48,
81+
"execution_count": 9,
8282
"metadata": {},
8383
"outputs": [
8484
{
@@ -87,7 +87,7 @@
8787
"(70000,)"
8888
]
8989
},
90-
"execution_count": 48,
90+
"execution_count": 9,
9191
"metadata": {},
9292
"output_type": "execute_result"
9393
}
@@ -106,12 +106,12 @@
106106
},
107107
{
108108
"cell_type": "code",
109-
"execution_count": 49,
110-
"metadata": {
111-
"collapsed": true
112-
},
109+
"execution_count": 11,
110+
"metadata": {},
113111
"outputs": [],
114112
"source": [
113+
"from sklearn.model_selection import train_test_split\n",
114+
"\n",
115115
"# test_size: what proportion of original data is used for test set\n",
116116
"train_img, test_img, train_lbl, test_lbl = train_test_split(\n",
117117
" mnist.data, mnist.target, test_size=1/7.0, random_state=0)"
@@ -140,15 +140,23 @@
140140
},
141141
{
142142
"cell_type": "code",
143-
"execution_count": 50,
144-
"metadata": {
145-
"collapsed": true
146-
},
147-
"outputs": [],
143+
"execution_count": 12,
144+
"metadata": {},
145+
"outputs": [
146+
{
147+
"name": "stderr",
148+
"output_type": "stream",
149+
"text": [
150+
"/Users/mgalarny/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py:444: DataConversionWarning: Data with input dtype uint8 was converted to float64 by StandardScaler.\n",
151+
" warnings.warn(msg, DataConversionWarning)\n"
152+
]
153+
}
154+
],
148155
"source": [
149156
"from sklearn.preprocessing import StandardScaler\n",
150157
"scaler = StandardScaler()\n",
151-
"train_img = scaler.fit_transform(train_img)\n",
158+
"scaler.fit(train_img)\n",
159+
"train_img = scaler.transform(train_img)\n",
152160
"test_img = scaler.transform(test_img)"
153161
]
154162
},
@@ -168,7 +176,7 @@
168176
},
169177
{
170178
"cell_type": "code",
171-
"execution_count": 51,
179+
"execution_count": 13,
172180
"metadata": {
173181
"collapsed": true
174182
},
@@ -186,7 +194,7 @@
186194
},
187195
{
188196
"cell_type": "code",
189-
"execution_count": 52,
197+
"execution_count": 14,
190198
"metadata": {
191199
"collapsed": true
192200
},
@@ -204,7 +212,7 @@
204212
},
205213
{
206214
"cell_type": "code",
207-
"execution_count": 53,
215+
"execution_count": 15,
208216
"metadata": {},
209217
"outputs": [
210218
{
@@ -214,7 +222,7 @@
214222
" svd_solver='auto', tol=0.0, whiten=False)"
215223
]
216224
},
217-
"execution_count": 53,
225+
"execution_count": 15,
218226
"metadata": {},
219227
"output_type": "execute_result"
220228
}
@@ -232,7 +240,7 @@
232240
},
233241
{
234242
"cell_type": "code",
235-
"execution_count": 54,
243+
"execution_count": 16,
236244
"metadata": {
237245
"collapsed": true
238246
},
@@ -258,7 +266,7 @@
258266
},
259267
{
260268
"cell_type": "code",
261-
"execution_count": 55,
269+
"execution_count": 17,
262270
"metadata": {
263271
"collapsed": true
264272
},
@@ -276,7 +284,7 @@
276284
},
277285
{
278286
"cell_type": "code",
279-
"execution_count": 56,
287+
"execution_count": 18,
280288
"metadata": {
281289
"collapsed": true
282290
},
@@ -304,7 +312,7 @@
304312
},
305313
{
306314
"cell_type": "code",
307-
"execution_count": 57,
315+
"execution_count": 19,
308316
"metadata": {},
309317
"outputs": [
310318
{
@@ -316,7 +324,7 @@
316324
" verbose=0, warm_start=False)"
317325
]
318326
},
319-
"execution_count": 57,
327+
"execution_count": 19,
320328
"metadata": {},
321329
"output_type": "execute_result"
322330
}
@@ -341,7 +349,7 @@
341349
},
342350
{
343351
"cell_type": "code",
344-
"execution_count": 58,
352+
"execution_count": 20,
345353
"metadata": {},
346354
"outputs": [
347355
{
@@ -350,7 +358,7 @@
350358
"array([ 1.])"
351359
]
352360
},
353-
"execution_count": 58,
361+
"execution_count": 20,
354362
"metadata": {},
355363
"output_type": "execute_result"
356364
}
@@ -363,7 +371,7 @@
363371
},
364372
{
365373
"cell_type": "code",
366-
"execution_count": 59,
374+
"execution_count": 21,
367375
"metadata": {},
368376
"outputs": [
369377
{
@@ -372,7 +380,7 @@
372380
"array([ 1., 9., 2., 2., 7., 1., 8., 3., 3., 7.])"
373381
]
374382
},
375-
"execution_count": 59,
383+
"execution_count": 21,
376384
"metadata": {},
377385
"output_type": "execute_result"
378386
}
@@ -405,7 +413,7 @@
405413
},
406414
{
407415
"cell_type": "code",
408-
"execution_count": 60,
416+
"execution_count": 23,
409417
"metadata": {},
410418
"outputs": [
411419
{
@@ -420,15 +428,6 @@
420428
"score = logisticRegr.score(test_img, test_lbl)\n",
421429
"print(score)"
422430
]
423-
},
424-
{
425-
"cell_type": "code",
426-
"execution_count": null,
427-
"metadata": {
428-
"collapsed": true
429-
},
430-
"outputs": [],
431-
"source": []
432431
}
433432
],
434433
"metadata": {

0 commit comments

Comments
 (0)