11
11
# import your matrix operations helper if needed (mo.rbind)
12
12
13
13
class FiniteDiffRegressor (BaseModel , RegressorMixin ):
14
- def __init__ (self , base_model , lr = 1e-4 , optimizer = 'gd' , eps = 1e-3 , batch_size = 32 , ** kwargs ):
14
+ def __init__ (self , base_model , lr = 1e-4 , optimizer = 'gd' , eps = 1e-3 , batch_size = 32 , alpha = 0.0 , l1_ratio = 0.5 , ** kwargs ):
15
15
"""
16
16
Finite difference trainer for nnetsauce models.
17
17
@@ -20,6 +20,8 @@ def __init__(self, base_model, lr=1e-4, optimizer='gd', eps=1e-3, batch_size=32,
20
20
lr: learning rate.
21
21
optimizer: 'gd' (gradient descent) 'sgd' (stochastic gradient descent) or 'adam' or 'cd' (coordinate descent).
22
22
eps: scaling factor for adaptive finite difference step size.
23
+ alpha: Elastic net penalty strength.
24
+ l1_ratio: Elastic net mixing parameter (0 = Ridge, 1 = Lasso).
23
25
**kwargs: Additional parameters to pass to the scikit-learn model.
24
26
"""
25
27
super ().__init__ (base_model , True , ** kwargs )
@@ -34,10 +36,17 @@ def __init__(self, base_model, lr=1e-4, optimizer='gd', eps=1e-3, batch_size=32,
34
36
self .batch_size = batch_size # for SGD
35
37
self .loss_history_ = []
36
38
self ._cd_index = 0 # For coordinate descent
39
+ self .alpha = alpha
40
+ self .l1_ratio = l1_ratio
37
41
38
42
def _loss (self , X , y ):
39
43
y_pred = self .model .predict (X )
40
- return np .sqrt (np .mean ((y - y_pred ) ** 2 ))
44
+ mse = np .mean ((y - y_pred ) ** 2 )
45
+ W = self .model .W_
46
+ l1 = np .sum (np .abs (W ))
47
+ l2 = np .sum (W ** 2 )
48
+ penalty = self .alpha * (self .l1_ratio * l1 + 0.5 * (1 - self .l1_ratio ) * l2 )
49
+ return np .sqrt (mse ) + penalty
41
50
42
51
def _compute_grad (self , X , y ):
43
52
W = deepcopy (self .model .W_ )
@@ -64,6 +73,12 @@ def _compute_grad(self, X, y):
64
73
loss_minus [i ] = self ._loss (X , y )
65
74
66
75
grad = ((loss_plus - loss_minus ) / (2 * h_vec )).reshape (shape )
76
+
77
+ # Add elastic net gradient
78
+ l1_grad = self .alpha * self .l1_ratio * np .sign (W )
79
+ l2_grad = self .alpha * (1 - self .l1_ratio ) * W
80
+ grad += l1_grad + l2_grad
81
+
67
82
self .model .W_ = W # restore original
68
83
return grad
69
84
0 commit comments