diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py index a7a58e17a43e..d502bb181959 100644 --- a/python/pyspark/ml/base.py +++ b/python/pyspark/ml/base.py @@ -116,3 +116,17 @@ class Model(Transformer): """ __metaclass__ = ABCMeta + + +class HasNumFeaturesModel: + """ + Provides getter of the number of features especially for model class + It should be mixin with JavaModel. + """ + @property + @since("1.7.0") + def numFeatures(self): + """ + The number of features used to train the model. + """ + return self._call_java("numFeatures") diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 067009559b6f..ddbca2e39b51 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -18,6 +18,7 @@ import warnings from pyspark import since +from pyspark.ml.base import * from pyspark.ml.util import * from pyspark.ml.wrapper import JavaEstimator, JavaModel from pyspark.ml.param import TypeConverters @@ -200,7 +201,7 @@ def _checkThresholdConsistency(self): " threshold (%g) and thresholds (equivalent to %g)" % (t2, t)) -class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): +class LogisticRegressionModel(HasNumFeaturesModel, JavaModel, JavaMLWritable, JavaMLReadable): """ Model fitted by LogisticRegression. @@ -324,6 +325,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred >>> model2 = DecisionTreeClassificationModel.load(model_path) >>> model.featureImportances == model2.featureImportances True + >>> model.numFeatures + 1 .. versionadded:: 1.4.0 """ @@ -373,7 +376,8 @@ def _create_model(self, java_model): @inherit_doc -class DecisionTreeClassificationModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): +class DecisionTreeClassificationModel(HasNumFeaturesModel, DecisionTreeModel, JavaMLWritable, + JavaMLReadable): """ Model fitted by DecisionTreeClassifier. @@ -439,6 +443,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) >>> model.transform(test1).head().prediction 1.0 + >>> model.numFeatures + 1 .. versionadded:: 1.4.0 """ @@ -487,7 +493,7 @@ def _create_model(self, java_model): return RandomForestClassificationModel(java_model) -class RandomForestClassificationModel(TreeEnsembleModels): +class RandomForestClassificationModel(HasNumFeaturesModel, TreeEnsembleModels): """ Model fitted by RandomForestClassifier. @@ -540,6 +546,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) >>> model.transform(test1).head().prediction 1.0 + >>> model.numFeatures + 1 .. versionadded:: 1.4.0 """ @@ -604,7 +612,7 @@ def getLossType(self): return self.getOrDefault(self.lossType) -class GBTClassificationModel(TreeEnsembleModels): +class GBTClassificationModel(HasNumFeaturesModel, TreeEnsembleModels): """ Model fitted by GBTClassifier. @@ -675,6 +683,8 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H True >>> model.theta == model2.theta True + >>> model.numFeatures + 2 .. versionadded:: 1.5.0 """ @@ -749,7 +759,7 @@ def getModelType(self): return self.getOrDefault(self.modelType) -class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable): +class NaiveBayesModel(HasNumFeaturesModel, JavaModel, JavaMLWritable, JavaMLReadable): """ Model fitted by NaiveBayes. @@ -817,6 +827,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, True >>> model.weights == model2.weights True + >>> model.numFeatures + 2 .. versionadded:: 1.6.0 """ @@ -894,7 +906,8 @@ def getBlockSize(self): return self.getOrDefault(self.blockSize) -class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLReadable): +class MultilayerPerceptronClassificationModel(HasNumFeaturesModel, JavaModel, JavaMLWritable, + JavaMLReadable): """ Model fitted by MultilayerPerceptronClassifier. diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index de8a5e4bed2e..68785671cd26 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -18,6 +18,7 @@ import warnings from pyspark import since +from pyspark.ml.base import HasNumFeaturesModel from pyspark.ml.param.shared import * from pyspark.ml.util import * from pyspark.ml.wrapper import JavaEstimator, JavaModel @@ -80,6 +81,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction True >>> model.intercept == model2.intercept True + >>> model.numFeatures + 1 .. versionadded:: 1.4.0 """ @@ -118,7 +121,7 @@ def _create_model(self, java_model): return LinearRegressionModel(java_model) -class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): +class LinearRegressionModel(HasNumFeaturesModel, JavaModel, JavaMLWritable, JavaMLReadable): """ Model fitted by LinearRegression. @@ -425,6 +428,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi True >>> model.depth == model2.depth True + >>> model.numFeatures + 1 .. versionadded:: 1.4.0 """ @@ -510,7 +515,8 @@ def __repr__(self): @inherit_doc -class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable): +class DecisionTreeRegressionModel(HasNumFeaturesModel, DecisionTreeModel, JavaMLWritable, + JavaMLReadable): """ Model fitted by DecisionTreeRegressor. @@ -564,6 +570,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) >>> model.transform(test1).head().prediction 0.5 + >>> model.numFeatures + 1 .. versionadded:: 1.4.0 """ @@ -613,7 +621,7 @@ def _create_model(self, java_model): return RandomForestRegressionModel(java_model) -class RandomForestRegressionModel(TreeEnsembleModels): +class RandomForestRegressionModel(HasNumFeaturesModel, TreeEnsembleModels): """ Model fitted by RandomForestRegressor. @@ -661,6 +669,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) >>> model.transform(test1).head().prediction 1.0 + >>> model.numFeatures + 1 .. versionadded:: 1.4.0 """ @@ -725,7 +735,7 @@ def getLossType(self): return self.getOrDefault(self.lossType) -class GBTRegressionModel(TreeEnsembleModels): +class GBTRegressionModel(HasNumFeaturesModel, TreeEnsembleModels): """ Model fitted by GBTRegressor.