1313# import your matrix operations helper if needed (mo.rbind)
1414
1515class FiniteDiffRegressor (BaseModel , RegressorMixin ):
16+ """
17+ Finite difference trainer for nnetsauce models.
18+
19+ Parameters
20+ ----------
21+ base_model : str
22+ The name of the base model (e.g., 'RidgeCV').
23+ lr : float, optional
24+ Learning rate for optimization (default=1e-4).
25+ optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional
26+ Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'),
27+ Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
28+ eps : float, optional
29+ Scaling factor for adaptive finite difference step size (default=1e-3).
30+ batch_size : int, optional
31+ Batch size for 'sgd' optimizer (default=32).
32+ alpha : float, optional
33+ Elastic net penalty strength (default=0.0).
34+ l1_ratio : float, optional
35+ Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
36+ type_loss : {'mse', 'quantile'}, optional
37+ Type of loss function to use (default='mse').
38+ q : float, optional
39+ Quantile for quantile loss (default=0.5).
40+ **kwargs
41+ Additional parameters to pass to the scikit-learn model.
42+ """
43+
1644 def __init__ (self , base_model ,
1745 lr = 1e-4 , optimizer = 'gd' ,
1846 eps = 1e-3 , batch_size = 32 ,
1947 alpha = 0.0 , l1_ratio = 0.0 ,
2048 type_loss = "mse" , q = 0.5 ,
2149 ** kwargs ):
22- """
23- Finite difference trainer for nnetsauce models.
24-
25- Args:
26-
27- base_model: a string, the name of the model.
28-
29- lr: learning rate.
30-
31- optimizer: 'gd' (gradient descent) 'sgd' (stochastic gradient descent) or 'adam' or 'cd' (coordinate descent).
32-
33- eps: scaling factor for adaptive finite difference step size.
34-
35- batch_size: integer, size of batch for 'sgd'
36-
37- alpha: Elastic net penalty strength.
38-
39- l1_ratio: Elastic net mixing parameter (0 = Ridge, 1 = Lasso).
40-
41- type_loss: Type of loss functions (currently "mse" or "quantile")
42-
43- q: quantile for `type_loss = 'quantile'`
44-
45- **kwargs: Additional parameters to pass to the scikit-learn model.
46- """
4750 super ().__init__ (base_model , True , ** kwargs )
4851 self .model = ns .CustomRegressor (self .model , ** self .custom_kwargs )
4952 assert isinstance (self .model , ns .CustomRegressor ),\
@@ -61,7 +64,24 @@ def __init__(self, base_model,
6164 self .type_loss = type_loss
6265 self .q = q
6366
64- def _loss (self , X , y , ** kwargs ):
67+ def _loss (self , X , y , ** kwargs ):
68+ """
69+ Compute the loss (with elastic net penalty) for the current model.
70+
71+ Parameters
72+ ----------
73+ X : array-like of shape (n_samples, n_features)
74+ Input data.
75+ y : array-like of shape (n_samples,)
76+ Target values.
77+ **kwargs
78+ Additional keyword arguments for loss calculation.
79+
80+ Returns
81+ -------
82+ float
83+ The computed loss value.
84+ """
6585 y_pred = self .model .predict (X )
6686 if self .type_loss == "mse" :
6787 loss = np .mean ((y - y_pred ) ** 2 )
@@ -73,6 +93,21 @@ def _loss(self, X, y, **kwargs):
7393 return loss + self .alpha * (self .l1_ratio * l1 + 0.5 * (1 - self .l1_ratio ) * l2 )
7494
7595 def _compute_grad (self , X , y ):
96+ """
97+ Compute the gradient of the loss with respect to W_ using finite differences.
98+
99+ Parameters
100+ ----------
101+ X : array-like of shape (n_samples, n_features)
102+ Input data.
103+ y : array-like of shape (n_samples,)
104+ Target values.
105+
106+ Returns
107+ -------
108+ ndarray
109+ Gradient array with the same shape as W_.
110+ """
76111 W = deepcopy (self .model .W_ )
77112 shape = W .shape
78113 W_flat = W .flatten ()
@@ -108,24 +143,29 @@ def _compute_grad(self, X, y):
108143
109144 def fit (self , X , y , epochs = 10 , verbose = True , show_progress = True , sample_weight = None , ** kwargs ):
110145 """
111- Optimizes W_ using finite differences and retrains readout.
112-
113- Args:
114-
115- X, y: data to compute loss and retrain output
116-
117- epochs: number of optimization steps
118-
119- verbose: whether to print progress messages
120-
121- show_progress: whether to show tqdm progress bar
122-
123- sample_weight: weight for observations
124-
125- Returns:
126-
127- self (enables method chaining)
128-
146+ Fit the model using finite difference optimization.
147+
148+ Parameters
149+ ----------
150+ X : array-like of shape (n_samples, n_features)
151+ Training data.
152+ y : array-like of shape (n_samples,)
153+ Target values.
154+ epochs : int, optional
155+ Number of optimization steps (default=10).
156+ verbose : bool, optional
157+ Whether to print progress messages (default=True).
158+ show_progress : bool, optional
159+ Whether to show tqdm progress bar (default=True).
160+ sample_weight : array-like, optional
161+ Sample weights.
162+ **kwargs
163+ Additional keyword arguments.
164+
165+ Returns
166+ -------
167+ self : object
168+ Returns self.
129169 """
130170
131171 self .model .fit (X , y )
@@ -210,40 +250,26 @@ def fit(self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=N
210250
211251
212252 def predict (self , X , level = 95 , method = 'splitconformal' , ** kwargs ):
213- """Predict test data X.
214-
215- Parameters:
216-
217- X: {array-like}, shape = [n_samples, n_features]
218- Training vectors, where n_samples is the number
219- of samples and n_features is the number of features.
220-
221- level: int
222- Level of confidence (default = 95)
223-
224- method: str
225- 'splitconformal', 'localconformal'
226- prediction (if you specify `return_pi = True`)
227-
228- **kwargs: additional parameters
229- `return_pi = True` for conformal prediction,
230- with `method` in ('splitconformal', 'localconformal')
231- or `return_std = True` for `self.model` in
232- (`sklearn.linear_model.BayesianRidge`,
233- `sklearn.linear_model.ARDRegressor`,
234- `sklearn.gaussian_process.GaussianProcessRegressor`)`
235-
236- Returns:
237-
238- model predictions:
239- an array if uncertainty quantification is not requested,
240- or a tuple if with prediction intervals and simulations
241- if `return_std = True` (mean, standard deviation,
242- lower and upper prediction interval) or `return_pi = True`
243- ()
244-
245253 """
246-
254+ Predict using the trained model.
255+
256+ Parameters
257+ ----------
258+ X : array-like of shape (n_samples, n_features)
259+ Input data.
260+ level : int, optional
261+ Level of confidence for prediction intervals (default=95).
262+ method : {'splitconformal', 'localconformal'}, optional
263+ Method for conformal prediction (default='splitconformal').
264+ **kwargs
265+ Additional keyword arguments. Use `return_pi=True` for prediction intervals,
266+ or `return_std=True` for standard deviation estimates.
267+
268+ Returns
269+ -------
270+ array or tuple
271+ Model predictions, or a tuple with prediction intervals or standard deviations if requested.
272+ """
247273 if "return_std" in kwargs :
248274
249275 alpha = 100 - level
0 commit comments