Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
daa1d3d
Add ClassificationKriging
mralbu Aug 30, 2020
b958501
Merge branch 'master' of https://github.com/GeoStat-Framework/PyKrige…
mralbu Sep 12, 2020
5dd5df1
Add ClassificationKriging
mralbu Sep 12, 2020
ec671d0
CI: move to github actions (#175)
MuellerSeb Dec 3, 2020
cbc52b3
Fix sample error in 02_kriging3D.py
Rhilip Mar 31, 2021
f909f41
Merge pull request #165 from mralbu/classification-kriging
MuellerSeb Apr 1, 2021
5e37940
Merge pull request #182 from Rhilip/patch-1
MuellerSeb Apr 1, 2021
a0c212c
CI: coveralls 3.0 fix
MuellerSeb Apr 3, 2021
f7a0d72
Benchmarks: blackened
MuellerSeb Apr 3, 2021
549893e
OK: fix long long bug for windows
MuellerSeb Apr 3, 2021
d58de24
CI: use new cibw actions; restructure
MuellerSeb Apr 3, 2021
28f297d
CI: fix scipy version bug
MuellerSeb Apr 3, 2021
b3e5168
OK: 2. try long fix
MuellerSeb Apr 3, 2021
4a3645f
Drop Py35 support
MuellerSeb Apr 3, 2021
4d41742
CI: fix pypi upload action version
MuellerSeb Apr 3, 2021
78f0cfb
Merge pull request #183 from GeoStat-Framework/CI-fix
MuellerSeb Apr 3, 2021
6a86094
Merge branch 'develop' of github.com:GeoStat-Framework/PyKrige into c…
MuellerSeb Apr 3, 2021
ea5a68e
Compat: fix Sklearn validation bug
MuellerSeb Apr 3, 2021
1bb7dca
update .zenodo.json
MuellerSeb Apr 3, 2021
04c81ad
Merge pull request #184 from GeoStat-Framework/classification-kriging
MuellerSeb Apr 3, 2021
2b9dc05
update changelog
MuellerSeb Apr 3, 2021
bcb80f8
Doc: correctly use .md files in doc
MuellerSeb Apr 3, 2021
666137d
Doc: add latex logo
MuellerSeb Apr 3, 2021
e4224f8
Merge pull request #186 from GeoStat-Framework/doc_update
MuellerSeb Apr 3, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .zenodo.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
{
"type": "Other",
"name": "Daniel Mej\u00eda Raigosa"
},
{
"type": "Other",
"name": "Marcelo Albuquerque"
}
],
"language": "eng",
Expand All @@ -37,6 +41,7 @@
"universal kriging",
"external drift kriging",
"regression kriging",
"classification kriging",
"variogram",
"geostatistics",
"Python",
Expand Down
12 changes: 12 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ Kriging algorithms
* ``OrdinaryKriging3D``: 3D ordinary kriging
* ``UniversalKriging3D``: 3D universal kriging
* ``RegressionKriging``: An implementation of Regression-Kriging
* ``ClassificationKriging``: An implementation of Simplicial Indicator Kriging


Wrappers
Expand Down Expand Up @@ -109,6 +110,17 @@ the ``OrdinaryKriging`` or the ``UniversalKriging`` class, and performs a correc
A demonstration of the regression kriging is provided in the
`corresponding example <http://pykrige.readthedocs.io/en/latest/examples/07_regression_kriging2d.html#sphx-glr-examples-07-regression-kriging2d-py>`_.

Classification Kriging
----------------------

`Simplifical Indicator kriging <https://www.sciencedirect.com/science/article/abs/pii/S1002070508600254>`_ can be performed
with `pykrige.rk.ClassificationKriging <http://pykrige.readthedocs.io/en/latest/examples/10_classification_kriging2d.html>`_.
This class takes as parameters a scikit-learn classification model, and details of either
the ``OrdinaryKriging`` or the ``UniversalKriging`` class, and performs a correction steps on the ML classification prediction.

A demonstration of the classification kriging is provided in the
`corresponding example <http://pykrige.readthedocs.io/en/latest/examples/10_classification_kriging2d.html#sphx-glr-examples-10-classification-kriging2d-py>`_.


License
^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion examples/07_regression_kriging2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

from pykrige.rk import RegressionKriging
from pykrige.compat import train_test_split

svr_model = SVR(C=0.1, gamma="auto")
rf_model = RandomForestRegressor(n_estimators=100)
Expand Down
50 changes: 50 additions & 0 deletions examples/10_classification_kriging2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Classification kriging
----------------------

An example of classification kriging
"""

import sys

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split

from pykrige.ck import ClassificationKriging

svc_model = SVC(C=0.1, gamma="auto", probability=True)
rf_model = RandomForestClassifier(n_estimators=100)
lr_model = LogisticRegression(max_iter=10000)

models = [svc_model, rf_model, lr_model]

try:
housing = fetch_california_housing()
except PermissionError:
# this dataset can occasionally fail to download on Windows
sys.exit(0)

# take the first 5000 as Kriging is memory intensive
p = housing["data"][:5000, :-2]
x = housing["data"][:5000, -2:]
target = housing["target"][:5000]
discretizer = KBinsDiscretizer(encode="ordinal")
target = discretizer.fit_transform(target.reshape(-1, 1))

p_train, p_test, x_train, x_test, target_train, target_test = train_test_split(
p, x, target, test_size=0.3, random_state=42
)

for m in models:
print("=" * 40)
print("classification model:", m.__class__.__name__)
m_ck = ClassificationKriging(classification_model=m, n_closest_points=10)
m_ck.fit(p_train, x_train, target_train)
print(
"Classification Score: ", m_ck.classification_model.score(p_test, target_test)
)
print("CK score: ", m_ck.score(p_test, x_test, target_test))
290 changes: 290 additions & 0 deletions pykrige/ck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
# coding: utf-8
"""Classification Kriging."""
import numpy as np
from pykrige.compat import Krige, validate_sklearn, check_sklearn_model

validate_sklearn()

from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.preprocessing import OneHotEncoder
from scipy.linalg import helmert


class ClassificationKriging:
"""
An implementation of Simplicial Indicator Kriging applied to classification ilr transformed residuals.

Parameters
----------
classification_model: machine learning model instance from sklearn
method: str, optional
type of kriging to be performed
variogram_model: str, optional
variogram model to be used during Kriging
n_closest_points: int
number of closest points to be used during Ordinary Kriging
nlags: int
see OK/UK class description
weight: bool
see OK/UK class description
verbose: bool
see OK/UK class description
exact_values : bool
see OK/UK class description
variogram_parameters : list or dict
see OK/UK class description
variogram_function : callable
see OK/UK class description
anisotropy_scaling : tuple
single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D)
anisotropy_angle : tuple
single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D)
enable_statistics : bool
see OK class description
coordinates_type : str
see OK/UK class description
drift_terms : list of strings
see UK/UK3D class description
point_drift : array_like
see UK class description
ext_drift_grid : tuple
Holding the three values external_drift, external_drift_x and
external_drift_z for the UK class
functional_drift : list of callable
see UK/UK3D class description
"""

def __init__(
self,
classification_model=SVC(),
method="ordinary",
variogram_model="linear",
n_closest_points=10,
nlags=6,
weight=False,
verbose=False,
exact_values=True,
pseudo_inv=False,
pseudo_inv_type="pinv",
variogram_parameters=None,
variogram_function=None,
anisotropy_scaling=(1.0, 1.0),
anisotropy_angle=(0.0, 0.0, 0.0),
enable_statistics=False,
coordinates_type="euclidean",
drift_terms=None,
point_drift=None,
ext_drift_grid=(None, None, None),
functional_drift=None,
):
check_sklearn_model(classification_model, task="classification")
self.classification_model = classification_model
self.n_closest_points = n_closest_points
self._kriging_kwargs = dict(
method=method,
variogram_model=variogram_model,
nlags=nlags,
weight=weight,
n_closest_points=n_closest_points,
verbose=verbose,
exact_values=exact_values,
pseudo_inv=pseudo_inv,
pseudo_inv_type=pseudo_inv_type,
variogram_parameters=variogram_parameters,
variogram_function=variogram_function,
anisotropy_scaling=anisotropy_scaling,
anisotropy_angle=anisotropy_angle,
enable_statistics=enable_statistics,
coordinates_type=coordinates_type,
drift_terms=drift_terms,
point_drift=point_drift,
ext_drift_grid=ext_drift_grid,
functional_drift=functional_drift,
)

def fit(self, p, x, y):
"""
Fit the classification method and also krige the residual.

Parameters
----------
p: ndarray
(Ns, d) array of predictor variables (Ns samples, d dimensions)
for classification
x: ndarray
ndarray of (x, y) points. Needs to be a (Ns, 2) array
corresponding to the lon/lat, for example 2d classification kriging.
array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging
y: ndarray
array of targets (Ns, )
"""
self.classification_model.fit(p, y.ravel())
print("Finished learning classification model")
self.classes_ = self.classification_model.classes_

self.krige = []
for i in range(len(self.classes_) - 1):
self.krige.append(Krige(**self._kriging_kwargs))

ml_pred = self.classification_model.predict_proba(p)
ml_pred_ilr = ilr_transformation(ml_pred)

self.onehotencode = OneHotEncoder(categories=[self.classes_])
y_ohe = np.array(self.onehotencode.fit_transform(y).todense())
y_ohe_ilr = ilr_transformation(y_ohe)

for i in range(len(self.classes_) - 1):
self.krige[i].fit(x=x, y=y_ohe_ilr[:, i] - ml_pred_ilr[:, i])

print("Finished kriging residuals")

def predict(self, p, x, **kwargs):
"""
Predict.

Parameters
----------
p: ndarray
(Ns, d) array of predictor variables (Ns samples, d dimensions)
for classification
x: ndarray
ndarray of (x, y) points. Needs to be a (Ns, 2) array
corresponding to the lon/lat, for example.
array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging

Returns
-------
pred: ndarray
The expected value of ys for the query inputs, of shape (Ns,).

"""

ml_pred = self.classification_model.predict_proba(p)
ml_pred_ilr = ilr_transformation(ml_pred)

pred_proba_ilr = self.krige_residual(x, **kwargs) + ml_pred_ilr
pred_proba = inverse_ilr_transformation(pred_proba_ilr)

return np.argmax(pred_proba, axis=1)

def krige_residual(self, x, **kwargs):
"""
Calculate the residuals.

Parameters
----------
x: ndarray
ndarray of (x, y) points. Needs to be a (Ns, 2) array
corresponding to the lon/lat, for example.

Returns
-------
residual: ndarray
kriged residual values
"""

krig_pred = [
self.krige[i].predict(x=x, **kwargs) for i in range(len(self.classes_) - 1)
]

return np.vstack(krig_pred).T

def score(self, p, x, y, sample_weight=None, **kwargs):
"""
Overloading default classification score method.

Parameters
----------
p: ndarray
(Ns, d) array of predictor variables (Ns samples, d dimensions)
for classification
x: ndarray
ndarray of (x, y) points. Needs to be a (Ns, 2) array
corresponding to the lon/lat, for example.
array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging
y: ndarray
array of targets (Ns, )
"""
return accuracy_score(
y_pred=self.predict(p, x, **kwargs), y_true=y, sample_weight=sample_weight
)


def closure(data, k=1.0):
"""Apply closure to data, sample-wise.
Adapted from https://github.com/ofgulban/compoda.

Parameters
----------
data : 2d numpy array, shape [n_samples, n_measurements]
Data to be closed to a certain constant. Do not forget to deal with
zeros in the data before this operation.
k : float, positive
Sum of the measurements will be equal to this number.

Returns
-------
data : 2d numpy array, shape [n_samples, n_measurements]
Closed data.

Reference
---------
[1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R.
(2015). Modelling and Analysis of Compositional Data, pg. 9.
Chichester, UK: John Wiley & Sons, Ltd.
DOI: 10.1002/9781119003144
"""

return k * data / np.sum(data, axis=1)[:, np.newaxis]


def ilr_transformation(data):
"""Isometric logratio transformation (not vectorized).
Adapted from https://github.com/ofgulban/compoda.

Parameters
----------
data : 2d numpy array, shape [n_samples, n_coordinates]
Barycentric coordinates (closed) in simplex space.

Returns
-------
out : 2d numpy array, shape [n_samples, n_coordinates-1]
Coordinates in real space.

Reference
---------
[1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R.
(2015). Modelling and Analysis of Compositional Data, pg. 37.
Chichester, UK: John Wiley & Sons, Ltd.
DOI: 10.1002/9781119003144
"""
data = np.maximum(data, np.finfo(float).eps)

return np.einsum("ij,jk->ik", np.log(data), -helmert(data.shape[1]).T)


def inverse_ilr_transformation(data):
"""Inverse isometric logratio transformation (not vectorized).
Adapted from https://github.com/ofgulban/compoda.

Parameters
----------
data : 2d numpy array, shape [n_samples, n_coordinates]
Isometric log-ratio transformed coordinates in real space.

Returns
-------
out : 2d numpy array, shape [n_samples, n_coordinates+1]
Barycentric coordinates (closed) in simplex space.

Reference
---------
[1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R.
(2015). Modelling and Analysis of Compositional Data, pg. 37.
Chichester, UK: John Wiley & Sons, Ltd.
DOI: 10.1002/9781119003144
"""

return closure(np.exp(np.einsum("ij,jk->ik", data, -helmert(data.shape[1] + 1))))
Loading