diff --git a/README.rst b/README.rst index 822de88..6da0083 100644 --- a/README.rst +++ b/README.rst @@ -74,6 +74,7 @@ Kriging algorithms * ``OrdinaryKriging3D``: 3D ordinary kriging * ``UniversalKriging3D``: 3D universal kriging * ``RegressionKriging``: An implementation of Regression-Kriging +* ``ClassificationKriging``: An implementation of Simplicial Indicator Kriging Wrappers @@ -109,6 +110,17 @@ the ``OrdinaryKriging`` or the ``UniversalKriging`` class, and performs a correc A demonstration of the regression kriging is provided in the `corresponding example `_. +Classification Kriging +---------------------- + +`Simplifical Indicator kriging `_ can be performed +with `pykrige.rk.ClassificationKriging `_. +This class takes as parameters a scikit-learn classification model, and details of either +the ``OrdinaryKriging`` or the ``UniversalKriging`` class, and performs a correction steps on the ML classification prediction. + +A demonstration of the classification kriging is provided in the +`corresponding example `_. + License ^^^^^^^ diff --git a/examples/10_classification_kriging2d.py b/examples/10_classification_kriging2d.py new file mode 100644 index 0000000..aba2956 --- /dev/null +++ b/examples/10_classification_kriging2d.py @@ -0,0 +1,50 @@ +""" +Classification kriging +------------------ + +An example of classification kriging +""" + +import sys + +from sklearn.svm import SVC +from sklearn.ensemble import RandomForestClassifier +from sklearn.linear_model import LogisticRegression +from sklearn.datasets import fetch_california_housing +from sklearn.preprocessing import KBinsDiscretizer + +from pykrige.ck import ClassificationKriging +from pykrige.compat import train_test_split + +svc_model = SVC(C=0.1, gamma="auto", probability=True) +rf_model = RandomForestClassifier(n_estimators=100) +lr_model = LogisticRegression(max_iter=10000) + +models = [svc_model, rf_model, lr_model] + +try: + housing = fetch_california_housing() +except PermissionError: + # this dataset can occasionally fail to download on Windows + sys.exit(0) + +# take the first 5000 as Kriging is memory intensive +p = housing["data"][:5000, :-2] +x = housing["data"][:5000, -2:] +target = housing["target"][:5000] +discretizer = KBinsDiscretizer(encode="ordinal") +target = discretizer.fit_transform(target.reshape(-1, 1)) + +p_train, p_test, x_train, x_test, target_train, target_test = train_test_split( + p, x, target, test_size=0.3, random_state=42 +) + +for m in models: + print("=" * 40) + print("classification model:", m.__class__.__name__) + m_ck = ClassificationKriging(classification_model=m, n_closest_points=10) + m_ck.fit(p_train, x_train, target_train) + print( + "Classification Score: ", m_ck.classification_model.score(p_test, target_test) + ) + print("CK score: ", m_ck.score(p_test, x_test, target_test)) diff --git a/pykrige/ck.py b/pykrige/ck.py new file mode 100644 index 0000000..feaa771 --- /dev/null +++ b/pykrige/ck.py @@ -0,0 +1,289 @@ +# coding: utf-8 +from pykrige.compat import Krige, validate_sklearn, check_sklearn_model + +validate_sklearn() + +from sklearn.metrics import r2_score, accuracy_score +from sklearn.svm import SVC +from sklearn.preprocessing import OneHotEncoder +import numpy as np +from scipy.linalg import helmert + + +class ClassificationKriging: + """ + An implementation of Simplicial Indicator Kriging applied to classification ilr transformed residuals. + + Parameters + ---------- + classification_model: machine learning model instance from sklearn + method: str, optional + type of kriging to be performed + variogram_model: str, optional + variogram model to be used during Kriging + n_closest_points: int + number of closest points to be used during Ordinary Kriging + nlags: int + see OK/UK class description + weight: bool + see OK/UK class description + verbose: bool + see OK/UK class description + exact_values : bool + see OK/UK class description + variogram_parameters : list or dict + see OK/UK class description + variogram_function : callable + see OK/UK class description + anisotropy_scaling : tuple + single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D) + anisotropy_angle : tuple + single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D) + enable_statistics : bool + see OK class description + coordinates_type : str + see OK/UK class description + drift_terms : list of strings + see UK/UK3D class description + point_drift : array_like + see UK class description + ext_drift_grid : tuple + Holding the three values external_drift, external_drift_x and + external_drift_z for the UK class + functional_drift : list of callable + see UK/UK3D class description + """ + + def __init__( + self, + classification_model=SVC(), + method="ordinary", + variogram_model="linear", + n_closest_points=10, + nlags=6, + weight=False, + verbose=False, + exact_values=True, + pseudo_inv=False, + pseudo_inv_type="pinv", + variogram_parameters=None, + variogram_function=None, + anisotropy_scaling=(1.0, 1.0), + anisotropy_angle=(0.0, 0.0, 0.0), + enable_statistics=False, + coordinates_type="euclidean", + drift_terms=None, + point_drift=None, + ext_drift_grid=(None, None, None), + functional_drift=None, + ): + check_sklearn_model(classification_model, task="classification") + self.classification_model = classification_model + self.n_closest_points = n_closest_points + self._kriging_kwargs = dict( + method=method, + variogram_model=variogram_model, + nlags=nlags, + weight=weight, + n_closest_points=n_closest_points, + verbose=verbose, + exact_values=exact_values, + pseudo_inv=pseudo_inv, + pseudo_inv_type=pseudo_inv_type, + variogram_parameters=variogram_parameters, + variogram_function=variogram_function, + anisotropy_scaling=anisotropy_scaling, + anisotropy_angle=anisotropy_angle, + enable_statistics=enable_statistics, + coordinates_type=coordinates_type, + drift_terms=drift_terms, + point_drift=point_drift, + ext_drift_grid=ext_drift_grid, + functional_drift=functional_drift, + ) + + def fit(self, p, x, y): + """ + Fit the classification method and also krige the residual. + + Parameters + ---------- + p: ndarray + (Ns, d) array of predictor variables (Ns samples, d dimensions) + for classification + x: ndarray + ndarray of (x, y) points. Needs to be a (Ns, 2) array + corresponding to the lon/lat, for example 2d classification kriging. + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + y: ndarray + array of targets (Ns, ) + """ + self.classification_model.fit(p, y.ravel()) + print("Finished learning classification model") + self.classes_ = self.classification_model.classes_ + + self.krige = [] + for i in range(len(self.classes_) - 1): + self.krige.append(Krige(**self._kriging_kwargs)) + + ml_pred = self.classification_model.predict_proba(p) + ml_pred_ilr = ilr_transformation(ml_pred) + + self.onehotencode = OneHotEncoder(categories=[self.classes_]) + y_ohe = np.array(self.onehotencode.fit_transform(y).todense()) + y_ohe_ilr = ilr_transformation(y_ohe) + + for i in range(len(self.classes_) - 1): + self.krige[i].fit(x=x, y=y_ohe_ilr[:, i] - ml_pred_ilr[:, i]) + + print("Finished kriging residuals") + + def predict(self, p, x, **kwargs): + """ + Predict. + + Parameters + ---------- + p: ndarray + (Ns, d) array of predictor variables (Ns samples, d dimensions) + for classification + x: ndarray + ndarray of (x, y) points. Needs to be a (Ns, 2) array + corresponding to the lon/lat, for example. + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + + Returns + ------- + pred: ndarray + The expected value of ys for the query inputs, of shape (Ns,). + + """ + + ml_pred = self.classification_model.predict_proba(p) + ml_pred_ilr = ilr_transformation(ml_pred) + + pred_proba_ilr = self.krige_residual(x, **kwargs) + ml_pred_ilr + pred_proba = inverse_ilr_transformation(pred_proba_ilr) + + return np.argmax(pred_proba, axis=1) + + def krige_residual(self, x, **kwargs): + """ + Calculate the residuals. + + Parameters + ---------- + x: ndarray + ndarray of (x, y) points. Needs to be a (Ns, 2) array + corresponding to the lon/lat, for example. + + Returns + ------- + residual: ndarray + kriged residual values + """ + + krig_pred = [ + self.krige[i].predict(x=x, **kwargs) for i in range(len(self.classes_) - 1) + ] + + return np.vstack(krig_pred).T + + def score(self, p, x, y, sample_weight=None, **kwargs): + """ + Overloading default classification score method. + + Parameters + ---------- + p: ndarray + (Ns, d) array of predictor variables (Ns samples, d dimensions) + for classification + x: ndarray + ndarray of (x, y) points. Needs to be a (Ns, 2) array + corresponding to the lon/lat, for example. + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + y: ndarray + array of targets (Ns, ) + """ + return accuracy_score( + y_pred=self.predict(p, x, **kwargs), y_true=y, sample_weight=sample_weight + ) + + +def closure(data, k=1.0): + """Apply closure to data, sample-wise. + Adapted from https://github.com/ofgulban/compoda. + + Parameters + ---------- + data : 2d numpy array, shape [n_samples, n_measurements] + Data to be closed to a certain constant. Do not forget to deal with + zeros in the data before this operation. + k : float, positive + Sum of the measurements will be equal to this number. + + Returns + ------- + data : 2d numpy array, shape [n_samples, n_measurements] + Closed data. + + Reference + --------- + [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. + (2015). Modelling and Analysis of Compositional Data, pg. 9. + Chichester, UK: John Wiley & Sons, Ltd. + DOI: 10.1002/9781119003144 + """ + + return k * data / np.sum(data, axis=1)[:, np.newaxis] + + +def ilr_transformation(data): + """Isometric logratio transformation (not vectorized). + Adapted from https://github.com/ofgulban/compoda. + + Parameters + ---------- + data : 2d numpy array, shape [n_samples, n_coordinates] + Barycentric coordinates (closed) in simplex space. + + Returns + ------- + out : 2d numpy array, shape [n_samples, n_coordinates-1] + Coordinates in real space. + + Reference + --------- + [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. + (2015). Modelling and Analysis of Compositional Data, pg. 37. + Chichester, UK: John Wiley & Sons, Ltd. + DOI: 10.1002/9781119003144 + """ + data = np.maximum(data, np.finfo(float).eps) + + return np.einsum("ij,jk->ik", np.log(data), -helmert(data.shape[1]).T) + + +def inverse_ilr_transformation(data): + """Inverse isometric logratio transformation (not vectorized). + Adapted from https://github.com/ofgulban/compoda. + + Parameters + ---------- + data : 2d numpy array, shape [n_samples, n_coordinates] + Isometric log-ratio transformed coordinates in real space. + + Returns + ------- + out : 2d numpy array, shape [n_samples, n_coordinates+1] + Barycentric coordinates (closed) in simplex space. + + Reference + --------- + [1] Pawlowsky-Glahn, V., Egozcue, J. J., & Tolosana-Delgado, R. + (2015). Modelling and Analysis of Compositional Data, pg. 37. + Chichester, UK: John Wiley & Sons, Ltd. + DOI: 10.1002/9781119003144 + """ + + return closure(np.exp(np.einsum("ij,jk->ik", data, -helmert(data.shape[1] + 1)))) diff --git a/pykrige/compat.py b/pykrige/compat.py index 73434aa..d3847e4 100644 --- a/pykrige/compat.py +++ b/pykrige/compat.py @@ -3,24 +3,294 @@ """For compatibility""" from functools import partial +from pykrige.uk3d import UniversalKriging3D +from pykrige.ok3d import OrdinaryKriging3D +from pykrige.uk import UniversalKriging +from pykrige.ok import OrdinaryKriging # sklearn try: from sklearn.model_selection import GridSearchCV from sklearn.model_selection import train_test_split + from sklearn.base import RegressorMixin, ClassifierMixin, BaseEstimator SKLEARN_INSTALLED = True except ImportError: SKLEARN_INSTALLED = False +krige_methods = { + "ordinary": OrdinaryKriging, + "universal": UniversalKriging, + "ordinary3d": OrdinaryKriging3D, + "universal3d": UniversalKriging3D, +} + +threed_krige = ("ordinary3d", "universal3d") + +krige_methods_kws = { + "ordinary": [ + "anisotropy_scaling", + "anisotropy_angle", + "enable_statistics", + "coordinates_type", + ], + "universal": [ + "anisotropy_scaling", + "anisotropy_angle", + "drift_terms", + "point_drift", + "external_drift", + "external_drift_x", + "external_drift_y", + "functional_drift", + ], + "ordinary3d": [ + "anisotropy_scaling_y", + "anisotropy_scaling_z", + "anisotropy_angle_x", + "anisotropy_angle_y", + "anisotropy_angle_z", + ], + "universal3d": [ + "anisotropy_scaling_y", + "anisotropy_scaling_z", + "anisotropy_angle_x", + "anisotropy_angle_y", + "anisotropy_angle_z", + "drift_terms", + "functional_drift", + ], +} + class SklearnException(Exception): pass +def validate_method(method): + """Validate the kriging method in use.""" + if method not in krige_methods.keys(): + raise ValueError( + "Kriging method must be one of {}".format(krige_methods.keys()) + ) + + def validate_sklearn(): if not SKLEARN_INSTALLED: raise SklearnException( "sklearn needs to be installed in order to use this module" ) + + +class Krige(RegressorMixin, BaseEstimator): + """ + A scikit-learn wrapper class for Ordinary and Universal Kriging. + + This works with both Grid/RandomSearchCv for finding the best + Krige parameters combination for a problem. + + Parameters + ---------- + method: str, optional + type of kriging to be performed + variogram_model: str, optional + variogram model to be used during Kriging + nlags: int + see OK/UK class description + weight: bool + see OK/UK class description + n_closest_points: int + number of closest points to be used during Ordinary Kriging + verbose: bool + see OK/UK class description + exact_values : bool + see OK/UK class description + variogram_parameters : list or dict + see OK/UK class description + variogram_function : callable + see OK/UK class description + anisotropy_scaling : tuple + single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D) + anisotropy_angle : tuple + single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D) + enable_statistics : bool + see OK class description + coordinates_type : str + see OK/UK class description + drift_terms : list of strings + see UK/UK3D class description + point_drift : array_like + see UK class description + ext_drift_grid : tuple + Holding the three values external_drift, external_drift_x and + external_drift_z for the UK class + functional_drift : list of callable + see UK/UK3D class description + """ + + def __init__( + self, + method="ordinary", + variogram_model="linear", + nlags=6, + weight=False, + n_closest_points=10, + verbose=False, + exact_values=True, + pseudo_inv=False, + pseudo_inv_type="pinv", + variogram_parameters=None, + variogram_function=None, + anisotropy_scaling=(1.0, 1.0), + anisotropy_angle=(0.0, 0.0, 0.0), + enable_statistics=False, + coordinates_type="euclidean", + drift_terms=None, + point_drift=None, + ext_drift_grid=(None, None, None), + functional_drift=None, + ): + validate_method(method) + self.variogram_model = variogram_model + self.variogram_parameters = variogram_parameters + self.variogram_function = variogram_function + self.nlags = nlags + self.weight = weight + self.verbose = verbose + self.exact_values = exact_values + self.pseudo_inv = pseudo_inv + self.pseudo_inv_type = pseudo_inv_type + self.anisotropy_scaling = anisotropy_scaling + self.anisotropy_angle = anisotropy_angle + self.enable_statistics = enable_statistics + self.coordinates_type = coordinates_type + self.drift_terms = drift_terms + self.point_drift = point_drift + self.ext_drift_grid = ext_drift_grid + self.functional_drift = functional_drift + self.model = None # not trained + self.n_closest_points = n_closest_points + self.method = method + + def fit(self, x, y, *args, **kwargs): + """ + Fit the current model. + + Parameters + ---------- + x: ndarray + array of Points, (x, y) pairs of shape (N, 2) for 2d kriging + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + y: ndarray + array of targets (N, ) + """ + val_kw = "val" if self.method in threed_krige else "z" + setup = dict( + variogram_model=self.variogram_model, + variogram_parameters=self.variogram_parameters, + variogram_function=self.variogram_function, + nlags=self.nlags, + weight=self.weight, + verbose=self.verbose, + exact_values=self.exact_values, + pseudo_inv=self.pseudo_inv, + pseudo_inv_type=self.pseudo_inv_type, + ) + add_setup = dict( + anisotropy_scaling=self.anisotropy_scaling[0], + anisotropy_angle=self.anisotropy_angle[0], + enable_statistics=self.enable_statistics, + coordinates_type=self.coordinates_type, + anisotropy_scaling_y=self.anisotropy_scaling[0], + anisotropy_scaling_z=self.anisotropy_scaling[1], + anisotropy_angle_x=self.anisotropy_angle[0], + anisotropy_angle_y=self.anisotropy_angle[1], + anisotropy_angle_z=self.anisotropy_angle[2], + drift_terms=self.drift_terms, + point_drift=self.point_drift, + external_drift=self.ext_drift_grid[0], + external_drift_x=self.ext_drift_grid[1], + external_drift_y=self.ext_drift_grid[2], + functional_drift=self.functional_drift, + ) + for kw in krige_methods_kws[self.method]: + setup[kw] = add_setup[kw] + input_kw = self._dimensionality_check(x) + input_kw.update(setup) + input_kw[val_kw] = y + self.model = krige_methods[self.method](**input_kw) + + def _dimensionality_check(self, x, ext=""): + if self.method in ("ordinary", "universal"): + if x.shape[1] != 2: + raise ValueError("2d krige can use only 2d points") + else: + return {"x" + ext: x[:, 0], "y" + ext: x[:, 1]} + if self.method in ("ordinary3d", "universal3d"): + if x.shape[1] != 3: + raise ValueError("3d krige can use only 3d points") + else: + return { + "x" + ext: x[:, 0], + "y" + ext: x[:, 1], + "z" + ext: x[:, 2], + } + + def predict(self, x, *args, **kwargs): + """ + Predict. + + Parameters + ---------- + x: ndarray + array of Points, (x, y) pairs of shape (N, 2) for 2d kriging + array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging + Returns + ------- + Prediction array + """ + if not self.model: + raise Exception("Not trained. Train first") + points = self._dimensionality_check(x, ext="points") + return self.execute(points, *args, **kwargs)[0] + + def execute(self, points, *args, **kwargs): + # TODO array of Points, (x, y) pairs of shape (N, 2) + """ + Execute. + + Parameters + ---------- + points: dict + + Returns + ------- + Prediction array + Variance array + """ + default_kw = dict(style="points", backend="loop") + default_kw.update(kwargs) + points.update(default_kw) + if isinstance(self.model, (OrdinaryKriging, OrdinaryKriging3D)): + points.update(dict(n_closest_points=self.n_closest_points)) + else: + print("n_closest_points will be ignored for UniversalKriging") + prediction, variance = self.model.execute(**points) + return prediction, variance + + +def check_sklearn_model(model, task="regression"): + """Check the sklearn method in use.""" + if task == "regression": + if not (isinstance(model, BaseEstimator) and isinstance(model, RegressorMixin)): + raise RuntimeError( + "Needs to supply an instance of a scikit-learn regression class." + ) + elif task == "classification": + if not ( + isinstance(model, BaseEstimator) and isinstance(model, ClassifierMixin) + ): + raise RuntimeError( + "Needs to supply an instance of a scikit-learn classification class." + ) diff --git a/pykrige/rk.py b/pykrige/rk.py index e7b4760..7e6821b 100644 --- a/pykrige/rk.py +++ b/pykrige/rk.py @@ -1,271 +1,10 @@ # coding: utf-8 -from pykrige.compat import validate_sklearn +from pykrige.compat import Krige, validate_sklearn, check_sklearn_model validate_sklearn() -from pykrige.ok import OrdinaryKriging -from pykrige.uk import UniversalKriging -from pykrige.ok3d import OrdinaryKriging3D -from pykrige.uk3d import UniversalKriging3D -from sklearn.base import RegressorMixin, BaseEstimator -from sklearn.svm import SVR -from sklearn.metrics import r2_score - -krige_methods = { - "ordinary": OrdinaryKriging, - "universal": UniversalKriging, - "ordinary3d": OrdinaryKriging3D, - "universal3d": UniversalKriging3D, -} - -threed_krige = ("ordinary3d", "universal3d") - -krige_methods_kws = { - "ordinary": [ - "anisotropy_scaling", - "anisotropy_angle", - "enable_statistics", - "coordinates_type", - ], - "universal": [ - "anisotropy_scaling", - "anisotropy_angle", - "drift_terms", - "point_drift", - "external_drift", - "external_drift_x", - "external_drift_y", - "functional_drift", - ], - "ordinary3d": [ - "anisotropy_scaling_y", - "anisotropy_scaling_z", - "anisotropy_angle_x", - "anisotropy_angle_y", - "anisotropy_angle_z", - ], - "universal3d": [ - "anisotropy_scaling_y", - "anisotropy_scaling_z", - "anisotropy_angle_x", - "anisotropy_angle_y", - "anisotropy_angle_z", - "drift_terms", - "functional_drift", - ], -} - - -def validate_method(method): - """Validate the kriging method in use.""" - if method not in krige_methods.keys(): - raise ValueError( - "Kriging method must be one of {}".format(krige_methods.keys()) - ) - -class Krige(RegressorMixin, BaseEstimator): - """ - A scikit-learn wrapper class for Ordinary and Universal Kriging. - - This works with both Grid/RandomSearchCv for finding the best - Krige parameters combination for a problem. - - Parameters - ---------- - method: str, optional - type of kriging to be performed - variogram_model: str, optional - variogram model to be used during Kriging - nlags: int - see OK/UK class description - weight: bool - see OK/UK class description - n_closest_points: int - number of closest points to be used during Ordinary Kriging - verbose: bool - see OK/UK class description - exact_values : bool - see OK/UK class description - variogram_parameters : list or dict - see OK/UK class description - variogram_function : callable - see OK/UK class description - anisotropy_scaling : tuple - single value for 2D (UK/OK) and two values in 3D (UK3D/OK3D) - anisotropy_angle : tuple - single value for 2D (UK/OK) and three values in 3D (UK3D/OK3D) - enable_statistics : bool - see OK class description - coordinates_type : str - see OK/UK class description - drift_terms : list of strings - see UK/UK3D class description - point_drift : array_like - see UK class description - ext_drift_grid : tuple - Holding the three values external_drift, external_drift_x and - external_drift_z for the UK class - functional_drift : list of callable - see UK/UK3D class description - """ - - def __init__( - self, - method="ordinary", - variogram_model="linear", - nlags=6, - weight=False, - n_closest_points=10, - verbose=False, - exact_values=True, - pseudo_inv=False, - pseudo_inv_type="pinv", - variogram_parameters=None, - variogram_function=None, - anisotropy_scaling=(1.0, 1.0), - anisotropy_angle=(0.0, 0.0, 0.0), - enable_statistics=False, - coordinates_type="euclidean", - drift_terms=None, - point_drift=None, - ext_drift_grid=(None, None, None), - functional_drift=None, - ): - validate_method(method) - self.variogram_model = variogram_model - self.variogram_parameters = variogram_parameters - self.variogram_function = variogram_function - self.nlags = nlags - self.weight = weight - self.verbose = verbose - self.exact_values = exact_values - self.pseudo_inv = pseudo_inv - self.pseudo_inv_type = pseudo_inv_type - self.anisotropy_scaling = anisotropy_scaling - self.anisotropy_angle = anisotropy_angle - self.enable_statistics = enable_statistics - self.coordinates_type = coordinates_type - self.drift_terms = drift_terms - self.point_drift = point_drift - self.ext_drift_grid = ext_drift_grid - self.functional_drift = functional_drift - self.model = None # not trained - self.n_closest_points = n_closest_points - self.method = method - - def fit(self, x, y, *args, **kwargs): - """ - Fit the current model. - - Parameters - ---------- - x: ndarray - array of Points, (x, y) pairs of shape (N, 2) for 2d kriging - array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging - y: ndarray - array of targets (N, ) - """ - val_kw = "val" if self.method in threed_krige else "z" - setup = dict( - variogram_model=self.variogram_model, - variogram_parameters=self.variogram_parameters, - variogram_function=self.variogram_function, - nlags=self.nlags, - weight=self.weight, - verbose=self.verbose, - exact_values=self.exact_values, - pseudo_inv=self.pseudo_inv, - pseudo_inv_type=self.pseudo_inv_type, - ) - add_setup = dict( - anisotropy_scaling=self.anisotropy_scaling[0], - anisotropy_angle=self.anisotropy_angle[0], - enable_statistics=self.enable_statistics, - coordinates_type=self.coordinates_type, - anisotropy_scaling_y=self.anisotropy_scaling[0], - anisotropy_scaling_z=self.anisotropy_scaling[1], - anisotropy_angle_x=self.anisotropy_angle[0], - anisotropy_angle_y=self.anisotropy_angle[1], - anisotropy_angle_z=self.anisotropy_angle[2], - drift_terms=self.drift_terms, - point_drift=self.point_drift, - external_drift=self.ext_drift_grid[0], - external_drift_x=self.ext_drift_grid[1], - external_drift_y=self.ext_drift_grid[2], - functional_drift=self.functional_drift, - ) - for kw in krige_methods_kws[self.method]: - setup[kw] = add_setup[kw] - input_kw = self._dimensionality_check(x) - input_kw.update(setup) - input_kw[val_kw] = y - self.model = krige_methods[self.method](**input_kw) - - def _dimensionality_check(self, x, ext=""): - if self.method in ("ordinary", "universal"): - if x.shape[1] != 2: - raise ValueError("2d krige can use only 2d points") - else: - return {"x" + ext: x[:, 0], "y" + ext: x[:, 1]} - if self.method in ("ordinary3d", "universal3d"): - if x.shape[1] != 3: - raise ValueError("3d krige can use only 3d points") - else: - return { - "x" + ext: x[:, 0], - "y" + ext: x[:, 1], - "z" + ext: x[:, 2], - } - - def predict(self, x, *args, **kwargs): - """ - Predict. - - Parameters - ---------- - x: ndarray - array of Points, (x, y) pairs of shape (N, 2) for 2d kriging - array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging - Returns - ------- - Prediction array - """ - if not self.model: - raise Exception("Not trained. Train first") - points = self._dimensionality_check(x, ext="points") - return self.execute(points, *args, **kwargs)[0] - - def execute(self, points, *args, **kwargs): - # TODO array of Points, (x, y) pairs of shape (N, 2) - """ - Execute. - - Parameters - ---------- - points: dict - - Returns - ------- - Prediction array - Variance array - """ - default_kw = dict(style="points", backend="loop") - default_kw.update(kwargs) - points.update(default_kw) - if isinstance(self.model, (OrdinaryKriging, OrdinaryKriging3D)): - points.update(dict(n_closest_points=self.n_closest_points)) - else: - print("n_closest_points will be ignored for UniversalKriging") - prediction, variance = self.model.execute(**points) - return prediction, variance - - -def check_sklearn_model(model): - """Check the sklearn method in use.""" - if not (isinstance(model, BaseEstimator) and isinstance(model, RegressorMixin)): - raise RuntimeError( - "Needs to supply an instance of a scikit-learn regression class." - ) +from sklearn.metrics import r2_score, accuracy_score +from sklearn.svm import SVR class RegressionKriging: diff --git a/pykrige/uk.py b/pykrige/uk.py index 707cf34..6b6863d 100644 --- a/pykrige/uk.py +++ b/pykrige/uk.py @@ -969,7 +969,8 @@ def _exec_vector(self, a, bd, xy, xy_orig, mask, n_withdrifts, spec_drift_grids) i += 1 if i != n_withdrifts: warnings.warn( - "Error in setting up kriging system. Kriging may fail.", RuntimeWarning, + "Error in setting up kriging system. Kriging may fail.", + RuntimeWarning, ) if self.UNBIAS: b[:, n_withdrifts, 0] = 1.0 diff --git a/pykrige/uk3d.py b/pykrige/uk3d.py index 3fe8813..115893c 100644 --- a/pykrige/uk3d.py +++ b/pykrige/uk3d.py @@ -778,7 +778,8 @@ def _exec_vector(self, a, bd, xyz, mask, n_withdrifts, spec_drift_grids): i += 1 if i != n_withdrifts: warnings.warn( - "Error in setting up kriging system. Kriging may fail.", RuntimeWarning, + "Error in setting up kriging system. Kriging may fail.", + RuntimeWarning, ) if self.UNBIAS: b[:, n_withdrifts, 0] = 1.0 diff --git a/tests/test_api.py b/tests/test_api.py index 98d2de0..6c033ca 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -3,8 +3,8 @@ import numpy as np import pytest -from pykrige.rk import Krige -from pykrige.rk import threed_krige +from pykrige.compat import Krige +from pykrige.compat import threed_krige def _method_and_vergiogram(): diff --git a/tests/test_classification_krige.py b/tests/test_classification_krige.py new file mode 100644 index 0000000..429a06e --- /dev/null +++ b/tests/test_classification_krige.py @@ -0,0 +1,101 @@ +from itertools import product +import pytest + +import numpy as np + +from pykrige.ck import ClassificationKriging + +try: + from sklearn.svm import SVC + from sklearn.datasets import fetch_california_housing + from sklearn.ensemble import RandomForestClassifier + from sklearn.preprocessing import KBinsDiscretizer + from pykrige.compat import train_test_split + + SKLEARN_INSTALLED = True +except ImportError: + SKLEARN_INSTALLED = False + + +def _methods(): + krige_methods = ["ordinary", "universal"] + ml_methods = [ + SVC(C=0.01, gamma="auto", probability=True), + RandomForestClassifier(n_estimators=50), + ] + return product(ml_methods, krige_methods) + + +@pytest.mark.skipif(not SKLEARN_INSTALLED, reason="requires scikit-learn") +def test_classification_krige(): + np.random.seed(1) + x = np.linspace(-1.0, 1.0, 100) + # create a feature matrix with 5 features + X = np.tile(x, reps=(5, 1)).T + y = ( + 1 + + 5 * X[:, 0] + - 2 * X[:, 1] + - 2 * X[:, 2] + + 3 * X[:, 3] + + 4 * X[:, 4] + + 2 * (np.random.rand(100) - 0.5) + ) + + # create lat/lon array + lon = np.linspace(-180.0, 180.0, 10) + lat = np.linspace(-90.0, 90.0, 10) + lon_lat = np.array(list(product(lon, lat))) + + discretizer = KBinsDiscretizer(encode="ordinal") + y = discretizer.fit_transform(y.reshape(-1, 1)) + + X_train, X_test, y_train, y_test, lon_lat_train, lon_lat_test = train_test_split( + X, y, lon_lat, train_size=0.7, random_state=10 + ) + + for ml_model, krige_method in _methods(): + class_model = ClassificationKriging( + classification_model=ml_model, method=krige_method, n_closest_points=2 + ) + class_model.fit(X_train, lon_lat_train, y_train) + assert class_model.score(X_test, lon_lat_test, y_test) > 0.25 + + +@pytest.mark.skipif(not SKLEARN_INSTALLED, reason="requires scikit-learn") +def test_krige_classification_housing(): + import ssl + import urllib + + try: + housing = fetch_california_housing() + except (ssl.SSLError, urllib.error.URLError): + ssl._create_default_https_context = ssl._create_unverified_context + try: + housing = fetch_california_housing() + except PermissionError: + # This can raise permission error on Appveyor + pytest.skip("Failed to load california housing dataset") + ssl._create_default_https_context = ssl.create_default_context + + # take only first 1000 + p = housing["data"][:1000, :-2] + x = housing["data"][:1000, -2:] + target = housing["target"][:1000] + discretizer = KBinsDiscretizer(encode="ordinal") + target = discretizer.fit_transform(target.reshape(-1, 1)) + + p_train, p_test, y_train, y_test, x_train, x_test = train_test_split( + p, target, x, train_size=0.7, random_state=10 + ) + + for ml_model, krige_method in _methods(): + + class_model = ClassificationKriging( + classification_model=ml_model, method=krige_method, n_closest_points=2 + ) + class_model.fit(p_train, x_train, y_train) + if krige_method == "ordinary": + assert class_model.score(p_test, x_test, y_test) > 0.5 + else: + assert class_model.score(p_test, x_test, y_test) > 0.0 diff --git a/tests/test_core.py b/tests/test_core.py index cba4e00..6b07f7d 100755 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -439,7 +439,12 @@ def test_core_krige_3d(): def test_non_exact(): # custom data for this test data = np.array( - [[0.0, 0.0, 0.47], [1.5, 1.5, 0.56], [3, 3, 0.74], [4.5, 4.5, 1.47],] + [ + [0.0, 0.0, 0.47], + [1.5, 1.5, 0.56], + [3, 3, 0.74], + [4.5, 4.5, 1.47], + ] ) # construct grid points so diagonal