Skip to content

Commit 6b04635

Browse files
GaelVaroquauxamueller
authored andcommitted
DOC: improve scale_c_example
1 parent 3d02176 commit 6b04635

File tree

1 file changed

+21
-22
lines changed

1 file changed

+21
-22
lines changed

examples/svm/plot_svm_scale_c.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -93,26 +93,23 @@
9393

9494
# set up dataset
9595
n_samples = 100
96-
n_features = 1000
96+
n_features = 300
9797

9898
#L1 data (only 5 informative features)
9999
X_1, y_1 = datasets.make_classification(n_samples=n_samples, n_features=n_features,
100100
n_informative=5, random_state=1)
101101

102-
#L2 data
103-
X_2 = 1 + rnd.randn(n_samples, n_features)
104-
coef = np.ones(n_features)
105-
106-
y_2 = np.dot(X_2, coef)
107-
y_2 += .1 * rnd.randn(n_samples) * np.std(y_2)
108-
y_2 = np.sign(y_2 - np.mean(y_2))
102+
#L2 data: non sparse, but less features
103+
y_2 = np.sign(.5 - rnd.rand(n_samples))
104+
X_2 = rnd.randn(n_samples, n_features/5) + y_2[:, np.newaxis]
105+
X_2 += 5 * rnd.randn(n_samples, n_features/5)
109106

110107
clf_sets = [(LinearSVC(penalty='L1', loss='L2', dual=False,
111108
tol=1e-3),
112-
np.logspace(-2.5, -1, 10), X_1, y_1),
113-
(LinearSVC(penalty='L2', loss='L1', dual=True,
114-
tol=1e-5, intercept_scaling=20),
115-
np.logspace(-3.5, -2.5, 10), X_2, y_2)]
109+
np.logspace(-2.2, -1.2, 10), X_1, y_1),
110+
(LinearSVC(penalty='L2', loss='L2', dual=True,
111+
tol=1e-4),
112+
np.logspace(-4.5, -2, 10), X_2, y_2)]
116113

117114
colors = ['b', 'g', 'r', 'c']
118115

@@ -123,30 +120,32 @@
123120
pl.xlabel('C')
124121
pl.ylabel('CV Score')
125122

126-
for k, train_size in enumerate(np.arange(0.4, 0.7, 0.1)[::-1]):
123+
for k, train_size in enumerate(np.linspace(0.3, 0.7, 3)[::-1]):
127124
param_grid = dict(C=cs)
125+
# To get nice curve, we need a large number of iterations to
126+
# reduce the variance
128127
grid = GridSearchCV(clf, refit=False, param_grid=param_grid,
129128
cv=ShuffleSplit(n=n_samples, train_size=train_size,
130-
n_iterations=45, random_state=1))
129+
n_iterations=250, random_state=1))
131130
grid.fit(X, y)
132131
scores = [x[1] for x in grid.grid_scores_]
133132

134133
scales = [(1, 'No scaling'),
135-
((np.sqrt(n_samples * train_size)), '1/sqrt(n_samples)'),
136134
((n_samples * train_size), '1/n_samples'),
137135
]
138136

139137
for subplotnum, (scaler, name) in enumerate(scales):
140-
pl.subplot(3, 1, subplotnum + 1)
138+
pl.subplot(2, 1, subplotnum + 1)
141139
grid_cs = cs * float(scaler) # scale the C's
142140
pl.semilogx(grid_cs, scores, label="fraction %.2f" %
143-
train_size)
141+
train_size)
144142
pl.title('scaling=%s, penalty=%s, loss=%s' % (name, clf.penalty, clf.loss))
145-
ymin, ymax = pl.ylim()
146-
pl.axvline(grid_cs[np.argmax(scores)], 0, 1,
147-
color=colors[k])
148-
pl.ylim(ymin=ymin-0.0025, ymax=ymax+0.008) # adjust the y-axis
149143

150-
pl.legend(loc="lower right")
144+
#ymin, ymax = pl.ylim()
145+
#pl.axvline(grid_cs[np.argmax(scores)], 0, 1,
146+
# color=colors[k])
147+
#pl.ylim(ymin=ymin-0.0025, ymax=ymax+0.008) # adjust the y-axis
148+
149+
pl.legend(loc="best")
151150
pl.show()
152151

0 commit comments

Comments
 (0)