hd-UQ
diff --git a/‎cd_dynamax/src/continuous_discrete_nonlinear_ssm/cdnlssm_utils.py‎
Lines changed: 7 additions & 12 deletions b/‎cd_dynamax/src/continuous_discrete_nonlinear_ssm/cdnlssm_utils.py‎
Lines changed: 7 additions & 12 deletions
diff --git a/‎cd_dynamax/src/continuous_discrete_nonlinear_ssm/inference_dpf.py‎
Lines changed: 140 additions & 20 deletions b/‎cd_dynamax/src/continuous_discrete_nonlinear_ssm/inference_dpf.py‎
Lines changed: 140 additions & 20 deletions
@@ -6,12 +6,12 @@
 from cd_dynamax.dynamax.parameters import ParameterProperties, ParameterSet
 import tensorflow_probability.substrates.jax.distributions as tfd
 
-
 from ..continuous_discrete_nonlinear_gaussian_ssm.cdnlgssm_utils import (
     _get_params,
     LearnableFunction,
     LearnableVector,
     LearnableMatrix,
+    ParamsCDNLGSSMDynamics,
 )
 
 
@@ -67,12 +67,8 @@ def sample(self, x, u=None, t=None):
         return self.transform.f(base_sample, u, t)
 
 
-# Dynamics container for CD-NLSSM
-class ParamsCDNLSSMDynamics(NamedTuple):
-    drift: LearnableFunction
-    diffusion_coefficient: LearnableFunction
-    diffusion_cov: LearnableFunction
-    approx_order: Union[float, ParameterProperties]
+# Currently, we only support Brownian motion-driven SDEs, so we can reuse the CDNLGSSM dynamics parameters
+ParamsCDNLSSMDynamics = ParamsCDNLGSSMDynamics
 
 
 ## CDNLSSM parameter class definitions
@@ -88,17 +84,16 @@ class ParamsCDNLSSMEmissions(NamedTuple):
 
 # CDNLGSSM parameters are different to CDLGSSM due to nonlinearities
 class ParamsCDNLSSM(NamedTuple):
-    r"""Parameters of a nonlinear Gaussian SSM.
+    r"""Parameters of a continuous-discrete nonlinear SSM.
 
     :param initial: initial distribution parameters
     :param dynamics: dynamics distribution parameters
     :param emissions: emission distribution parameters
 
     The assumed transition and emission distributions are
-    $$p(z_1) = N(z_1 | m, S)$$
-    $$p(z_t | z_{t-1}, u_t) = N(z_t | m_t, P_t)$$
-    $$p(y_t | z_t) = N(y_t | h(z_t, u_t), R_t)$$
-
+    $$p(z_0) = p_initial(z_0)$$
+    $$p(z_{t_k} | z_{t_{k-1}}, u_{t_k}) = solve_sde(z_{t_{k-1}}, u_{t_k}, t_{k-1}, t_k, f_dynamics, L_dynamics, Q_dynamics)$$
+    $$p(y_{t_k} | z_{t_k}) = p_emissions(y_{t_k} | z_{t_k})$$
     """
 
     initial: ParamsCDNLSSMInitial
 
@@ -1,4 +1,4 @@
-from typing import NamedTuple
+from typing import NamedTuple, Union, Tuple
 
 from jaxtyping import Array, Float
 from .cdnlssm_utils import ParamsCDNLSSM
@@ -14,12 +14,17 @@ class DPFHyperParams(NamedTuple):
 
     dt_final: float = 1e-4
     N_particles: int = 100
+    ess_threshold_ratio: float = 0.5
     resample_method: str = "stop_gradient"
     softness: float = 0.7
     cov_rescaling: float = 1.0
     state_order: str = "first"
     dt_average: float = 0.1
     diffeqsolve_settings: dict = {}
+    return_ess_history: bool = False
+    proposal_method: str = (
+        "bootstrap"  # Currently, only bootstrap proposals are supported.
+    )
 
 
 def _predict(
@@ -31,7 +36,20 @@ def _predict(
     u,
     filter_hyperparams,
 ):
-    """Predict evolution of ensemble of particles through the nonlinear stochastic dynamics."""
+    """Predict evolution of ensemble of particles through the nonlinear stochastic dynamics.
+
+    Args:
+        key: Random key.
+        x: Particles to predict.
+        params: Parameters of the CDNLSSM.
+        t0: Initial time.
+        t1: Final time.
+        u: Inputs.
+        filter_hyperparams: Hyperparameters of the filter.
+
+    Returns:
+        x_pred: Predicted particles.
+    """
 
     def drift(t, y, args):
         return params.dynamics.drift.f(y, u, t)
@@ -95,11 +113,17 @@ def _normalize_log_weights(log_w):
     return log_w - log_norm, log_norm
 
 
+def _effective_sample_size(log_w):
+    return jnp.exp(-logsumexp(2.0 * log_w))
+
+
 _SUPPORTED_RESAMPLERS = ("multinomial", "soft", "stop_gradient")
 
 
 def _validate_resample_method(method: str) -> str:
-    """Ensure the requested resampling strategy is implemented."""
+    """Ensure the requested resampling strategy is implemented.
+
+    See :attr:`_SUPPORTED_RESAMPLERS` for supported methods."""
     method_lower = method.lower()
     if method_lower not in _SUPPORTED_RESAMPLERS:
         raise ValueError(
@@ -110,6 +134,21 @@ def _validate_resample_method(method: str) -> str:
 
 
 def _multinomial_resample(key, x, log_w):
+    """Multinomial resampling.
+
+    This is the classical resampling strategy for particle filters,
+    where each particle is resampled with probability proportional to its weight.
+
+    Args:
+        key: Random key.
+        x: Particles to resample.
+        log_w: Log weights of the particles.
+
+    Returns:
+        x_resampled: Resampled particles.
+        log_w_resampled: Log weights of the resampled particles.
+        idx: Indices of the resampled particles.
+    """
     probs = jnp.exp(log_w - logsumexp(log_w))
     idx = jr.choice(key, x.shape[0], shape=(x.shape[0],), p=probs, replace=True)
     x_resampled = x[idx]
@@ -118,7 +157,25 @@ def _multinomial_resample(key, x, log_w):
 
 
 def _stop_gradient_resample(key, x, log_w, *, base_method: str = "multinomial"):
-    """Resample while passing gradients through the chosen particles."""
+    """Stop-gradient resampling [1].
+
+    Stop-gradient resampling [1] modifies the resampling step such that forward passes are not modified.
+
+    References:
+        [1] Scibior A, Wood F (2021). “Differentiable particle filtering without modifying the forward pass.” arXiv:2106.10314
+
+    Args:
+        key: Random key.
+        x: Particles to resample.
+        log_w: Log weights of the particles.
+        base_method: Base resampling method to use.
+
+    Returns:
+        x_resampled: Resampled particles.
+        log_w_resampled: Log weights of the resampled particles.
+        idx: Indices of the resampled particles.
+
+    """
     if base_method != "multinomial":
         raise ValueError(
             f"Unsupported base resampler '{base_method}' for stop_gradient resampling."
@@ -131,6 +188,27 @@ def _stop_gradient_resample(key, x, log_w, *, base_method: str = "multinomial"):
 
 
 def _soft_resample(key, x, log_w, softness):
+    """Soft resampling [1].
+
+    Soft resampling approximates differentiable resampling by mixing the weights w with a uniform distribution,
+        q(k) = softness * w[k] + (1-softness) / n_particles,
+    then reweighting via importance weights.
+
+    This strategy generally provides biased gradients, but can still be an efficient approximation.
+
+    References:
+        [1] Karkus P, Hsu D, Lee WS (2018). “Particle filter networks with application to visual localization.” In Proc. Conf. Robot Learn., pp. 169–178. PMLR, Zurich, CH.
+
+    Args:
+        key: Random key.
+        x: Particles to resample.
+        log_w: Log weights of the particles.
+        softness: Softness parameter.
+
+    Returns:
+        x_resampled: Resampled particles.
+        log_w_resampled: Log weights of the resampled particles.
+    """
     n = x.shape[0]
     log_n = jnp.log(n)
     log_softness = jnp.log(softness)
@@ -151,11 +229,44 @@ def filter_dpf(
     us: Array | None = None,
     ts: Array | None = None,
     hyperparams: DPFHyperParams = DPFHyperParams(),
-):
-    """Differentiable particle filter with configurable resampling (default stop-gradient)."""
+) -> Union[Tuple[Array, Array, Array, float], Tuple[Array, Array, Array, Array, float]]:
+    """Differentiable particle filter with configurable resampling.
+
+    A differentiable particle filter (DPF) is a particle filter with the (discrete, non-differentiable) resampling step replaced in some way to allow for gradient-based optimization.
+    This implementation supports three different resampling methods:
+    - Multinomial resampling (biased)
+    - Soft resampling [1] (biased; interpolates between multinomial and uniform resampling)
+    - Stop-gradient resampling [2] (unbiased for score estimates)
+
+    Currently, only bootstrap proposals are supported.
+
+    References:
+        [1] Karkus P, Hsu D, Lee WS (2018). “Particle filter networks with application to visual localization.” In Proc. Conf. Robot Learn., pp. 169–178. PMLR, Zurich, CH.
+        [2] Scibior A, Wood F (2021). “Differentiable particle filtering without modifying the forward pass.” arXiv:2106.10314
+
+    Args:
+        key: Random key.
+        params: Parameters of the CDNLSSM.
+        ys: Emissions.
+        us: Inputs.
+        ts: Times.
+        hyperparams: Hyperparameters of the filter.
+
+    Returns:
+        particles: Particles.
+        log_weights: Log weights.
+        ess_history: (if return_ess_history is True) Effective sample size history.
+        log_evidence: Log evidence.
+    """
     n_particles = int(hyperparams.N_particles)
     T = ys.shape[0]
     resample_method = _validate_resample_method(hyperparams.resample_method)
+    ess_threshold = hyperparams.ess_threshold_ratio * n_particles
+
+    if hyperparams.proposal_method != "bootstrap":
+        raise ValueError(
+            f"Currently, only bootstrap proposals are supported, but {hyperparams.proposal_method} was provided."
+        )
 
     key_init, key = jr.split(key)
     particles = params.initial.initial_distribution.distribution.sample(
@@ -200,28 +311,37 @@ def _do_predict(p):
         )
         log_w, log_norm = _normalize_log_weights(log_w)
         log_evidence = log_evidence + log_norm
+        ess = _effective_sample_size(log_w)
 
         particles_hist = particles
         logw_hist = log_w
 
-        # TODO: Only resample under criteria, e.g., ESS < threshold.
-        if resample_method == "soft":
-            particles, log_w = _soft_resample(
-                key_resample, particles, log_w, hyperparams.softness
-            )
-        elif resample_method == "multinomial":
-            particles, log_w, _ = _multinomial_resample(key_resample, particles, log_w)
-        else:  # stop_gradient
-            particles, log_w = _stop_gradient_resample(
-                key_resample, particles, log_w, base_method="multinomial"
+        def _resample(args):
+            x_in, log_w_in, key_in = args
+            if resample_method == "soft":
+                return _soft_resample(key_in, x_in, log_w_in, hyperparams.softness)
+            if resample_method == "multinomial":
+                x_out, log_w_out, _ = _multinomial_resample(key_in, x_in, log_w_in)
+                return x_out, log_w_out
+            return _stop_gradient_resample(
+                key_in, x_in, log_w_in, base_method="multinomial"
             )
 
-        return (particles, log_w, log_evidence), (particles_hist, logw_hist)
+        particles, log_w = lax.cond(
+            ess < ess_threshold,
+            _resample,
+            lambda args: (args[0], args[1]),
+            (particles, log_w, key_resample),
+        )
 
-    (particles, log_w, log_evidence), (particles_hist, logw_hist) = lax.scan(
+        return (particles, log_w, log_evidence), (particles_hist, logw_hist, ess)
+
+    (particles, log_w, log_evidence), (particles_hist, logw_hist, ess_hist) = lax.scan(
         _step,
         (particles, log_w, log_evidence),
         (keys, idxs, t_currs, t_prevs, u_prevs, u_currs),
     )
-
-    return particles_hist, logw_hist, log_evidence
+    if hyperparams.return_ess_history:
+        return particles_hist, logw_hist, ess_hist, log_evidence
+    else:
+        return particles_hist, logw_hist, log_evidence