Skip to content

Commit 46ed0ff

Browse files
add elasticnet penalty to FiniteDiff
1 parent db52eeb commit 46ed0ff

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "tisthemachinelearner"
3-
version = "0.5.0"
3+
version = "0.6.0"
44
description = "Lightweight interface to scikit-learn, xgboost, lightgbm, catboost, with 2 classes"
55
readme = "README.md"
66
requires-python = ">=3.8"

src/tisthemachinelearner/finitedifftrainer.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# import your matrix operations helper if needed (mo.rbind)
1212

1313
class FiniteDiffRegressor(BaseModel, RegressorMixin):
14-
def __init__(self, base_model, lr=1e-4, optimizer='gd', eps=1e-3, batch_size=32, **kwargs):
14+
def __init__(self, base_model, lr=1e-4, optimizer='gd', eps=1e-3, batch_size=32, alpha=0.0, l1_ratio=0.5, **kwargs):
1515
"""
1616
Finite difference trainer for nnetsauce models.
1717
@@ -20,6 +20,8 @@ def __init__(self, base_model, lr=1e-4, optimizer='gd', eps=1e-3, batch_size=32,
2020
lr: learning rate.
2121
optimizer: 'gd' (gradient descent) 'sgd' (stochastic gradient descent) or 'adam' or 'cd' (coordinate descent).
2222
eps: scaling factor for adaptive finite difference step size.
23+
alpha: Elastic net penalty strength.
24+
l1_ratio: Elastic net mixing parameter (0 = Ridge, 1 = Lasso).
2325
**kwargs: Additional parameters to pass to the scikit-learn model.
2426
"""
2527
super().__init__(base_model, True, **kwargs)
@@ -34,10 +36,17 @@ def __init__(self, base_model, lr=1e-4, optimizer='gd', eps=1e-3, batch_size=32,
3436
self.batch_size = batch_size # for SGD
3537
self.loss_history_ = []
3638
self._cd_index = 0 # For coordinate descent
39+
self.alpha = alpha
40+
self.l1_ratio = l1_ratio
3741

3842
def _loss(self, X, y):
3943
y_pred = self.model.predict(X)
40-
return np.sqrt(np.mean((y - y_pred) ** 2))
44+
mse = np.mean((y - y_pred) ** 2)
45+
W = self.model.W_
46+
l1 = np.sum(np.abs(W))
47+
l2 = np.sum(W ** 2)
48+
penalty = self.alpha * (self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2)
49+
return np.sqrt(mse) + penalty
4150

4251
def _compute_grad(self, X, y):
4352
W = deepcopy(self.model.W_)
@@ -64,6 +73,12 @@ def _compute_grad(self, X, y):
6473
loss_minus[i] = self._loss(X, y)
6574

6675
grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape)
76+
77+
# Add elastic net gradient
78+
l1_grad = self.alpha * self.l1_ratio * np.sign(W)
79+
l2_grad = self.alpha * (1 - self.l1_ratio) * W
80+
grad += l1_grad + l2_grad
81+
6782
self.model.W_ = W # restore original
6883
return grad
6984

0 commit comments

Comments
 (0)