Skip to content

Commit 12a369a

Browse files
committed
Updated blog post for standard scaler
1 parent fcbe55e commit 12a369a

File tree

1 file changed

+47
-104
lines changed

1 file changed

+47
-104
lines changed

Sklearn/PCA/PCA_MNIST_Logistic_Regression_Speeding_Machine_Learning.ipynb

Lines changed: 47 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
},
1717
{
1818
"cell_type": "code",
19-
"execution_count": 31,
19+
"execution_count": 45,
2020
"metadata": {
2121
"collapsed": true
2222
},
@@ -28,7 +28,7 @@
2828
},
2929
{
3030
"cell_type": "code",
31-
"execution_count": 32,
31+
"execution_count": 46,
3232
"metadata": {},
3333
"outputs": [
3434
{
@@ -46,7 +46,7 @@
4646
" 'target': array([ 0., 0., 0., ..., 9., 9., 9.])}"
4747
]
4848
},
49-
"execution_count": 32,
49+
"execution_count": 46,
5050
"metadata": {},
5151
"output_type": "execute_result"
5252
}
@@ -57,7 +57,7 @@
5757
},
5858
{
5959
"cell_type": "code",
60-
"execution_count": 33,
60+
"execution_count": 47,
6161
"metadata": {},
6262
"outputs": [
6363
{
@@ -66,7 +66,7 @@
6666
"(70000, 784)"
6767
]
6868
},
69-
"execution_count": 33,
69+
"execution_count": 47,
7070
"metadata": {},
7171
"output_type": "execute_result"
7272
}
@@ -78,7 +78,7 @@
7878
},
7979
{
8080
"cell_type": "code",
81-
"execution_count": 34,
81+
"execution_count": 48,
8282
"metadata": {},
8383
"outputs": [
8484
{
@@ -87,7 +87,7 @@
8787
"(70000,)"
8888
]
8989
},
90-
"execution_count": 34,
90+
"execution_count": 48,
9191
"metadata": {},
9292
"output_type": "execute_result"
9393
}
@@ -97,39 +97,6 @@
9797
"mnist.target.shape"
9898
]
9999
},
100-
{
101-
"cell_type": "markdown",
102-
"metadata": {},
103-
"source": [
104-
"## Standardizing the Data"
105-
]
106-
},
107-
{
108-
"cell_type": "markdown",
109-
"metadata": {},
110-
"source": [
111-
"Since PCA yields a feature subspace that maximizes the variance along the axes, it makes sense to standardize the data, especially, if it was measured on different scales."
112-
]
113-
},
114-
{
115-
"cell_type": "markdown",
116-
"metadata": {},
117-
"source": [
118-
"Notebook going over the importance of feature Scaling: http://scikit-learn.org/stable/auto_examples/preprocessing/plot_scaling_importance.html#sphx-glr-auto-examples-preprocessing-plot-scaling-importance-py\n"
119-
]
120-
},
121-
{
122-
"cell_type": "code",
123-
"execution_count": 35,
124-
"metadata": {
125-
"collapsed": true
126-
},
127-
"outputs": [],
128-
"source": [
129-
"# Standardize features by removing the mean and scaling to unit variance\n",
130-
"mnist.data = StandardScaler().fit_transform(mnist.data)"
131-
]
132-
},
133100
{
134101
"cell_type": "markdown",
135102
"metadata": {},
@@ -139,7 +106,7 @@
139106
},
140107
{
141108
"cell_type": "code",
142-
"execution_count": 36,
109+
"execution_count": 49,
143110
"metadata": {
144111
"collapsed": true
145112
},
@@ -151,71 +118,38 @@
151118
]
152119
},
153120
{
154-
"cell_type": "code",
155-
"execution_count": 37,
121+
"cell_type": "markdown",
156122
"metadata": {},
157-
"outputs": [
158-
{
159-
"name": "stdout",
160-
"output_type": "stream",
161-
"text": [
162-
"(60000, 784)\n"
163-
]
164-
}
165-
],
166123
"source": [
167-
"print(train_img.shape)"
124+
"## Standardizing the Data"
168125
]
169126
},
170127
{
171-
"cell_type": "code",
172-
"execution_count": 38,
128+
"cell_type": "markdown",
173129
"metadata": {},
174-
"outputs": [
175-
{
176-
"name": "stdout",
177-
"output_type": "stream",
178-
"text": [
179-
"(60000,)\n"
180-
]
181-
}
182-
],
183130
"source": [
184-
"print(train_lbl.shape)"
131+
"Since PCA yields a feature subspace that maximizes the variance along the axes, it makes sense to standardize the data, especially, if it was measured on different scales."
185132
]
186133
},
187134
{
188-
"cell_type": "code",
189-
"execution_count": 39,
135+
"cell_type": "markdown",
190136
"metadata": {},
191-
"outputs": [
192-
{
193-
"name": "stdout",
194-
"output_type": "stream",
195-
"text": [
196-
"(10000, 784)\n"
197-
]
198-
}
199-
],
200137
"source": [
201-
"print(test_img.shape)"
138+
"Notebook going over the importance of feature Scaling: http://scikit-learn.org/stable/auto_examples/preprocessing/plot_scaling_importance.html#sphx-glr-auto-examples-preprocessing-plot-scaling-importance-py"
202139
]
203140
},
204141
{
205142
"cell_type": "code",
206-
"execution_count": 40,
207-
"metadata": {},
208-
"outputs": [
209-
{
210-
"name": "stdout",
211-
"output_type": "stream",
212-
"text": [
213-
"(10000,)\n"
214-
]
215-
}
216-
],
143+
"execution_count": 50,
144+
"metadata": {
145+
"collapsed": true
146+
},
147+
"outputs": [],
217148
"source": [
218-
"print(test_lbl.shape)"
149+
"from sklearn.preprocessing import StandardScaler\n",
150+
"scaler = StandardScaler()\n",
151+
"train_img = scaler.fit_transform(train_img)\n",
152+
"test_img = scaler.transform(test_img)"
219153
]
220154
},
221155
{
@@ -234,7 +168,7 @@
234168
},
235169
{
236170
"cell_type": "code",
237-
"execution_count": 41,
171+
"execution_count": 51,
238172
"metadata": {
239173
"collapsed": true
240174
},
@@ -252,7 +186,7 @@
252186
},
253187
{
254188
"cell_type": "code",
255-
"execution_count": 42,
189+
"execution_count": 52,
256190
"metadata": {
257191
"collapsed": true
258192
},
@@ -270,7 +204,7 @@
270204
},
271205
{
272206
"cell_type": "code",
273-
"execution_count": 51,
207+
"execution_count": 53,
274208
"metadata": {},
275209
"outputs": [
276210
{
@@ -280,7 +214,7 @@
280214
" svd_solver='auto', tol=0.0, whiten=False)"
281215
]
282216
},
283-
"execution_count": 51,
217+
"execution_count": 53,
284218
"metadata": {},
285219
"output_type": "execute_result"
286220
}
@@ -298,7 +232,7 @@
298232
},
299233
{
300234
"cell_type": "code",
301-
"execution_count": 15,
235+
"execution_count": 54,
302236
"metadata": {
303237
"collapsed": true
304238
},
@@ -324,7 +258,7 @@
324258
},
325259
{
326260
"cell_type": "code",
327-
"execution_count": 16,
261+
"execution_count": 55,
328262
"metadata": {
329263
"collapsed": true
330264
},
@@ -342,7 +276,7 @@
342276
},
343277
{
344278
"cell_type": "code",
345-
"execution_count": 17,
279+
"execution_count": 56,
346280
"metadata": {
347281
"collapsed": true
348282
},
@@ -370,7 +304,7 @@
370304
},
371305
{
372306
"cell_type": "code",
373-
"execution_count": 18,
307+
"execution_count": 57,
374308
"metadata": {},
375309
"outputs": [
376310
{
@@ -382,7 +316,7 @@
382316
" verbose=0, warm_start=False)"
383317
]
384318
},
385-
"execution_count": 18,
319+
"execution_count": 57,
386320
"metadata": {},
387321
"output_type": "execute_result"
388322
}
@@ -407,7 +341,7 @@
407341
},
408342
{
409343
"cell_type": "code",
410-
"execution_count": 21,
344+
"execution_count": 58,
411345
"metadata": {},
412346
"outputs": [
413347
{
@@ -416,7 +350,7 @@
416350
"array([ 1.])"
417351
]
418352
},
419-
"execution_count": 21,
353+
"execution_count": 58,
420354
"metadata": {},
421355
"output_type": "execute_result"
422356
}
@@ -429,7 +363,7 @@
429363
},
430364
{
431365
"cell_type": "code",
432-
"execution_count": 22,
366+
"execution_count": 59,
433367
"metadata": {},
434368
"outputs": [
435369
{
@@ -438,7 +372,7 @@
438372
"array([ 1., 9., 2., 2., 7., 1., 8., 3., 3., 7.])"
439373
]
440374
},
441-
"execution_count": 22,
375+
"execution_count": 59,
442376
"metadata": {},
443377
"output_type": "execute_result"
444378
}
@@ -471,21 +405,30 @@
471405
},
472406
{
473407
"cell_type": "code",
474-
"execution_count": 30,
408+
"execution_count": 60,
475409
"metadata": {},
476410
"outputs": [
477411
{
478412
"name": "stdout",
479413
"output_type": "stream",
480414
"text": [
481-
"0.9195\n"
415+
"0.92\n"
482416
]
483417
}
484418
],
485419
"source": [
486420
"score = logisticRegr.score(test_img, test_lbl)\n",
487421
"print(score)"
488422
]
423+
},
424+
{
425+
"cell_type": "code",
426+
"execution_count": null,
427+
"metadata": {
428+
"collapsed": true
429+
},
430+
"outputs": [],
431+
"source": []
489432
}
490433
],
491434
"metadata": {

0 commit comments

Comments
 (0)