Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Add ClassificationKriging
  • Loading branch information
mralbu committed Nov 28, 2020
commit 5dd5df11c2e33f86e770c45396dd9e1ede0120df
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,11 @@ A demonstration of the regression kriging is provided in the
`corresponding example <http://pykrige.readthedocs.io/en/latest/examples/07_regression_kriging2d.html#sphx-glr-examples-07-regression-kriging2d-py>`_.

Classification Kriging
------------------
----------------------

`Simplifical Indicator kriging <https://www.sciencedirect.com/science/article/abs/pii/S1002070508600254>`_ can be performed
with `pykrige.rk.ClassificationKriging <http://pykrige.readthedocs.io/en/latest/examples/10_classification_kriging2d.html>`_.
This class takes as parameters a scikit-learn classification model, and details of either either
This class takes as parameters a scikit-learn classification model, and details of either
the ``OrdinaryKriging`` or the ``UniversalKriging`` class, and performs a correction steps on the ML classification prediction.

A demonstration of the classification kriging is provided in the
Expand Down
11 changes: 6 additions & 5 deletions examples/10_classification_kriging2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import KBinsDiscretizer

from pykrige.rk import ClassificationKriging
from pykrige.ck import ClassificationKriging
from pykrige.compat import train_test_split

svc_model = SVC(C=0.1, gamma="auto", probability=True)
Expand All @@ -32,7 +32,7 @@
p = housing["data"][:5000, :-2]
x = housing["data"][:5000, -2:]
target = housing["target"][:5000]
discretizer = KBinsDiscretizer(encode='ordinal')
discretizer = KBinsDiscretizer(encode="ordinal")
target = discretizer.fit_transform(target.reshape(-1, 1))

p_train, p_test, x_train, x_test, target_train, target_test = train_test_split(
Expand All @@ -41,9 +41,10 @@

for m in models:
print("=" * 40)
print("regression model:", m.__class__.__name__)
print("classification model:", m.__class__.__name__)
m_ck = ClassificationKriging(classification_model=m, n_closest_points=10)
m_ck.fit(p_train, x_train, target_train)
print("Classification Score: ",
m_ck.classification_model.score(p_test, target_test))
print(
"Classification Score: ", m_ck.classification_model.score(p_test, target_test)
)
print("CK score: ", m_ck.score(p_test, x_test, target_test))
289 changes: 289 additions & 0 deletions pykrige/ck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
# coding: utf-8
from pykrige.compat import Krige, validate_sklearn, check_sklearn_model

validate_sklearn()

from sklearn.metrics import r2_score, accuracy_score
from sklearn.svm import SVC
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from scipy.linalg import helmert


class ClassificationKriging:
"""
An implementation of Simplicial Indicator Kriging applied to classification ilr transformed residuals.

Parameters
----------
classification_model: machine learning model instance from sklearn
method: str, optional
type of kriging to be performed
variogram_model: str, optional
variogram model to be used during Kriging
n_closest_points: int
number of closest points to be used during Ordinary Kriging
nlags: int
see OK/UK class description
weight: bool
see OK/UK class description
verbose: bool
see OK/UK class description
exact_values : bool
see OK/UK class description
variogram_parameters : list or dict
see OK/UK class description
variogram_function : callable
see OK/UK class description
anisotropy_scaling : tuple
single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D)
anisotropy_angle : tuple
single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D)
enable_statistics : bool
see OK class description
coordinates_type : str
see OK/UK class description
drift_terms : list of strings
see UK/UK3D class description
point_drift : array_like
see UK class description
ext_drift_grid : tuple
Holding the three values external_drift, external_drift_x and
external_drift_z for the UK class
functional_drift : list of callable
see UK/UK3D class description
"""

def __init__(
self,
classification_model=SVC(),
method="ordinary",
variogram_model="linear",
n_closest_points=10,
nlags=6,
weight=False,
verbose=False,
exact_values=True,
pseudo_inv=False,
pseudo_inv_type="pinv",
variogram_parameters=None,
variogram_function=None,
anisotropy_scaling=(1.0, 1.0),
anisotropy_angle=(0.0, 0.0, 0.0),
enable_statistics=False,
coordinates_type="euclidean",
drift_terms=None,
point_drift=None,
ext_drift_grid=(None, None, None),
functional_drift=None,
):
check_sklearn_model(classification_model, task="classification")
self.classification_model = classification_model
self.n_closest_points = n_closest_points
self._kriging_kwargs = dict(
method=method,
variogram_model=variogram_model,
nlags=nlags,
weight=weight,
n_closest_points=n_closest_points,
verbose=verbose,
exact_values=exact_values,
pseudo_inv=pseudo_inv,
pseudo_inv_type=pseudo_inv_type,
variogram_parameters=variogram_parameters,
variogram_function=variogram_function,
anisotropy_scaling=anisotropy_scaling,
anisotropy_angle=anisotropy_angle,
enable_statistics=enable_statistics,
coordinates_type=coordinates_type,
drift_terms=drift_terms,
point_drift=point_drift,
ext_drift_grid=ext_drift_grid,
functional_drift=functional_drift,
)

def fit(self, p, x, y):
"""
Fit the classification method and also krige the residual.

Parameters
----------
p: ndarray
(Ns, d) array of predictor variables (Ns samples, d dimensions)
for classification
x: ndarray
ndarray of (x, y) points. Needs to be a (Ns, 2) array
corresponding to the lon/lat, for example 2d classification kriging.
array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging
y: ndarray
array of targets (Ns, )
"""
self.classification_model.fit(p, y.ravel())
print("Finished learning classification model")
self.classes_ = self.classification_model.classes_

self.krige = []
for i in range(len(self.classes_) - 1):
self.krige.append(Krige(**self._kriging_kwargs))

ml_pred = self.classification_model.predict_proba(p)
ml_pred_ilr = ilr_transformation(ml_pred)

self.onehotencode = OneHotEncoder(categories=[self.classes_])
y_ohe = np.array(self.onehotencode.fit_transform(y).todense())
y_ohe_ilr = ilr_transformation(y_ohe)

for i in range(len(self.classes_) - 1):
self.krige[i].fit(x=x, y=y_ohe_ilr[:, i] - ml_pred_ilr[:, i])

print("Finished kriging residuals")

def predict(self, p, x, **kwargs):
"""
Predict.

Parameters
----------
p: ndarray
(Ns, d) array of predictor variables (Ns samples, d dimensions)
for classification
x: ndarray
ndarray of (x, y) points. Needs to be a (Ns, 2) array
corresponding to the lon/lat, for example.
array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging

Returns
-------
pred: ndarray
The expected value of ys for the query inputs, of shape (Ns,).

"""

ml_pred = self.classification_model.predict_proba(p)
ml_pred_ilr = ilr_transformation(ml_pred)

pred_proba_ilr = self.krige_residual(x, **kwargs) + ml_pred_ilr
pred_proba = inverse_ilr_transformation(pred_proba_ilr)

return np.argmax(pred_proba, axis=1)

def krige_residual(self, x, **kwargs):
"""
Calculate the residuals.

Parameters
----------
x: ndarray
ndarray of (x, y) points. Needs to be a (Ns, 2) array
corresponding to the lon/lat, for example.

Returns
-------
residual: ndarray
kriged residual values
"""

krig_pred = [
self.krige[i].predict(x=x, **kwargs) for i in range(len(self.classes_) - 1)
]

return np.vstack(krig_pred).T

def score(self, p, x, y, sample_weight=None, **kwargs):
"""
Overloading default classification score method.

Parameters
----------
p: ndarray
(Ns, d) array of predictor variables (Ns samples, d dimensions)
for classification
x: ndarray
ndarray of (x, y) points. Needs to be a (Ns, 2) array
corresponding to the lon/lat, for example.
array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging
y: ndarray
array of targets (Ns, )
"""
return accuracy_score(
y_pred=self.predict(p, x, **kwargs), y_true=y, sample_weight=sample_weight
)


def closure(data, k=1.0):
"""Apply closure to data, sample-wise.
Adapted from https://github.com/ofgulban/compoda.

Parameters
----------
data : 2d numpy array, shape [n_samples, n_measurements]
Data to be closed to a certain constant. Do not forget to deal with
zeros in the data before this operation.
k : float, positive
Sum of the measurements will be equal to this number.

Returns
-------
data : 2d numpy array, shape [n_samples, n_measurements]
Closed data.

Reference
---------
[1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R.
(2015). Modelling and Analysis of Compositional Data, pg. 9.
Chichester, UK: John Wiley & Sons, Ltd.
DOI: 10.1002/9781119003144
"""

return k * data / np.sum(data, axis=1)[:, np.newaxis]


def ilr_transformation(data):
"""Isometric logratio transformation (not vectorized).
Adapted from https://github.com/ofgulban/compoda.

Parameters
----------
data : 2d numpy array, shape [n_samples, n_coordinates]
Barycentric coordinates (closed) in simplex space.

Returns
-------
out : 2d numpy array, shape [n_samples, n_coordinates-1]
Coordinates in real space.

Reference
---------
[1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R.
(2015). Modelling and Analysis of Compositional Data, pg. 37.
Chichester, UK: John Wiley & Sons, Ltd.
DOI: 10.1002/9781119003144
"""
data = np.maximum(data, np.finfo(float).eps)

return np.einsum("ij,jk->ik", np.log(data), -helmert(data.shape[1]).T)


def inverse_ilr_transformation(data):
"""Inverse isometric logratio transformation (not vectorized).
Adapted from https://github.com/ofgulban/compoda.

Parameters
----------
data : 2d numpy array, shape [n_samples, n_coordinates]
Isometric log-ratio transformed coordinates in real space.

Returns
-------
out : 2d numpy array, shape [n_samples, n_coordinates+1]
Barycentric coordinates (closed) in simplex space.

Reference
---------
[1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R.
(2015). Modelling and Analysis of Compositional Data, pg. 37.
Chichester, UK: John Wiley & Sons, Ltd.
DOI: 10.1002/9781119003144
"""

return closure(np.exp(np.einsum("ij,jk->ik", data, -helmert(data.shape[1] + 1))))
Loading