diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index d9ff356b9403..3e81249be462 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -264,7 +264,12 @@ def getFamily(self): return self.getOrDefault(self.family) -class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable): +class LogisticRegressionModel(JavaModel, JavaClassificationModel, HasFeaturesCol, + HasLabelCol, HasPredictionCol, HasMaxIter, + HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol, + HasElasticNetParam, HasFitIntercept, HasStandardization, + HasThresholds, HasWeightCol, HasAggregationDepth, + JavaMLWritable, JavaMLReadable): """ Model fitted by LogisticRegression. @@ -669,8 +674,11 @@ def _create_model(self, java_model): @inherit_doc -class DecisionTreeClassificationModel(DecisionTreeModel, JavaClassificationModel, JavaMLWritable, - JavaMLReadable): +class DecisionTreeClassificationModel(DecisionTreeModel, JavaClassificationModel, HasFeaturesCol, + HasLabelCol, HasPredictionCol, HasProbabilityCol, + HasRawPredictionCol, DecisionTreeParams, + TreeClassifierParams, HasCheckpointInterval, HasSeed, + JavaMLWritable, JavaMLReadable): """ Model fitted by DecisionTreeClassifier. @@ -798,8 +806,9 @@ def _create_model(self, java_model): return RandomForestClassificationModel(java_model) -class RandomForestClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable, - JavaMLReadable): +class RandomForestClassificationModel(TreeEnsembleModel, JavaClassificationModel, HasFeaturesCol, + HasLabelCol, HasPredictionCol, HasRawPredictionCol, + HasProbabilityCol, JavaMLWritable, JavaMLReadable): """ Model fitted by RandomForestClassifier. @@ -950,7 +959,8 @@ def getLossType(self): return self.getOrDefault(self.lossType) -class GBTClassificationModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable, +class GBTClassificationModel(TreeEnsembleModel, JavaPredictionModel, HasFeaturesCol, + HasLabelCol, HasPredictionCol, JavaMLWritable, JavaMLReadable): """ Model fitted by GBTClassifier. @@ -1105,7 +1115,9 @@ def getModelType(self): return self.getOrDefault(self.modelType) -class NaiveBayesModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable): +class NaiveBayesModel(JavaModel, JavaClassificationModel, HasFeaturesCol, HasLabelCol, + HasPredictionCol, HasProbabilityCol, HasRawPredictionCol, + JavaMLWritable, JavaMLReadable): """ Model fitted by NaiveBayes. @@ -1304,8 +1316,9 @@ def getInitialWeights(self): return self.getOrDefault(self.initialWeights) -class MultilayerPerceptronClassificationModel(JavaModel, JavaPredictionModel, JavaMLWritable, - JavaMLReadable): +class MultilayerPerceptronClassificationModel(JavaModel, JavaPredictionModel, + HasFeaturesCol, HasLabelCol, HasPredictionCol, + JavaMLWritable, JavaMLReadable): """ .. note:: Experimental diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 7632f05c3b68..0b12573b247d 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -27,7 +27,8 @@ 'LDA', 'LDAModel', 'LocalLDAModel', 'DistributedLDAModel'] -class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable): +class GaussianMixtureModel(JavaModel, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed, + HasProbabilityCol, JavaMLWritable, JavaMLReadable): """ .. note:: Experimental @@ -181,7 +182,8 @@ def getK(self): return self.getOrDefault(self.k) -class KMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): +class KMeansModel(JavaModel, JavaMLWritable, JavaMLReadable, HasFeaturesCol, + HasPredictionCol, HasMaxIter, HasTol, HasSeed): """ Model fitted by KMeans. @@ -324,7 +326,8 @@ def getInitSteps(self): return self.getOrDefault(self.initSteps) -class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): +class BisectingKMeansModel(JavaModel, HasFeaturesCol, HasPredictionCol, HasMaxIter, + HasSeed, JavaMLWritable, JavaMLReadable): """ .. note:: Experimental @@ -461,7 +464,7 @@ def _create_model(self, java_model): @inherit_doc -class LDAModel(JavaModel): +class LDAModel(JavaModel, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInterval): """ .. note:: Experimental diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 94afe82a3647..089b5f126c87 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -340,7 +340,8 @@ def _create_model(self, java_model): return CountVectorizerModel(java_model) -class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable): +class CountVectorizerModel(JavaModel, HasInputCol, HasOutputCol, + JavaMLReadable, JavaMLWritable): """ Model fitted by :py:class:`CountVectorizer`. @@ -635,7 +636,7 @@ def _create_model(self, java_model): return IDFModel(java_model) -class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable): +class IDFModel(JavaModel, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ Model fitted by :py:class:`IDF`. @@ -713,7 +714,7 @@ def _create_model(self, java_model): return MaxAbsScalerModel(java_model) -class MaxAbsScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): +class MaxAbsScalerModel(JavaModel, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ .. note:: Experimental @@ -837,7 +838,7 @@ def _create_model(self, java_model): return MinMaxScalerModel(java_model) -class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): +class MinMaxScalerModel(JavaModel, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ Model fitted by :py:class:`MinMaxScaler`. @@ -1538,7 +1539,7 @@ def _create_model(self, java_model): return StandardScalerModel(java_model) -class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): +class StandardScalerModel(JavaModel, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ Model fitted by :py:class:`StandardScaler`. @@ -1626,7 +1627,8 @@ def _create_model(self, java_model): return StringIndexerModel(java_model) -class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable): +class StringIndexerModel(JavaModel, HasInputCol, HasOutputCol, HasHandleInvalid, + JavaMLReadable, JavaMLWritable): """ Model fitted by :py:class:`StringIndexer`. @@ -1996,7 +1998,7 @@ def _create_model(self, java_model): return VectorIndexerModel(java_model) -class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable): +class VectorIndexerModel(JavaModel, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ Model fitted by :py:class:`VectorIndexer`. @@ -2134,6 +2136,15 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has >>> doc = spark.createDataFrame([(sent,), (sent,)], ["sentence"]) >>> word2Vec = Word2Vec(vectorSize=5, seed=42, inputCol="sentence", outputCol="model") >>> model = word2Vec.fit(doc) + >>> estimator_paramMap = word2Vec.extractParamMap() + >>> model_paramMap = model.extractParamMap() + >>> all([estimator_paramMap[getattr(word2Vec, param.name)] == value + ... for param, value in model_paramMap.items()]) + True + >>> all([param.parent == model.uid for param in model_paramMap]) + True + >>> [param.name for param in model.params] + ['inputCol', 'maxIter', 'outputCol', 'seed', 'stepSize'] >>> model.getVectors().show() +----+--------------------+ |word| vector| @@ -2292,7 +2303,8 @@ def _create_model(self, java_model): return Word2VecModel(java_model) -class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable): +class Word2VecModel(JavaModel, HasStepSize, HasMaxIter, HasSeed, HasInputCol, + HasOutputCol, JavaMLReadable, JavaMLWritable): """ Model fitted by :py:class:`Word2Vec`. @@ -2333,6 +2345,15 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab >>> df = spark.createDataFrame(data,["features"]) >>> pca = PCA(k=2, inputCol="features", outputCol="pca_features") >>> model = pca.fit(df) + >>> estimator_paramMap = pca.extractParamMap() + >>> model_paramMap = model.extractParamMap() + >>> all([estimator_paramMap[getattr(pca, param.name)] == value + ... for param, value in model_paramMap.items()]) + True + >>> all([param.parent == model.uid for param in model_paramMap]) + True + >>> [param.name for param in model.params] + ['inputCol', 'outputCol'] >>> model.transform(df).collect()[0].pca_features DenseVector([1.648..., -4.013...]) >>> model.explainedVariance @@ -2394,7 +2415,7 @@ def _create_model(self, java_model): return PCAModel(java_model) -class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable): +class PCAModel(JavaModel, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space. @@ -2437,6 +2458,15 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM ... ], ["y", "x", "s"]) >>> rf = RFormula(formula="y ~ x + s") >>> model = rf.fit(df) + >>> estimator_paramMap = rf.extractParamMap() + >>> model_paramMap = model.extractParamMap() + >>> all([estimator_paramMap[getattr(rf, param.name)] == value + ... for param, value in model_paramMap.items()]) + True + >>> all([param.parent == model.uid for param in model_paramMap]) + True + >>> [param.name for param in model.params] + ['featuresCol', 'labelCol'] >>> model.transform(df).show() +---+---+---+---------+-----+ | y| x| s| features|label| @@ -2554,7 +2584,7 @@ def __str__(self): return "RFormula(%s) (uid=%s)" % (formulaStr, self.uid) -class RFormulaModel(JavaModel, JavaMLReadable, JavaMLWritable): +class RFormulaModel(JavaModel, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaMLWritable): """ .. note:: Experimental @@ -2586,6 +2616,15 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja ... ["features", "label"]) >>> selector = ChiSqSelector(numTopFeatures=1, outputCol="selectedFeatures") >>> model = selector.fit(df) + >>> estimator_paramMap = selector.extractParamMap() + >>> model_paramMap = model.extractParamMap() + >>> all([estimator_paramMap[getattr(selector, param.name)] == value + ... for param, value in model_paramMap.items()]) + True + >>> all([param.parent == model.uid for param in model_paramMap]) + True + >>> [param.name for param in model.params] + ['featuresCol', 'labelCol', 'outputCol'] >>> model.transform(df).head().selectedFeatures DenseVector([18.0]) >>> model.selectedFeatures @@ -2710,7 +2749,8 @@ def _create_model(self, java_model): return ChiSqSelectorModel(java_model) -class ChiSqSelectorModel(JavaModel, JavaMLReadable, JavaMLWritable): +class ChiSqSelectorModel(JavaModel, HasFeaturesCol, HasOutputCol, HasLabelCol, + JavaMLReadable, JavaMLWritable): """ .. note:: Experimental diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index ade4864e1d78..98f0ac08c4d0 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -336,6 +336,11 @@ def hasParam(self, paramName): return isinstance(p, Param) else: raise TypeError("hasParam(): paramName must be a string") + try: + param = self._resolveParam(paramName) + return param in self.params + except: + return False @since("1.4.0") def getOrDefault(self, param): diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index e28d38bd19f8..42dc149093af 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -26,8 +26,8 @@ @inherit_doc -class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, HasRegParam, HasSeed, - JavaMLWritable, JavaMLReadable): +class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, + HasRegParam, HasSeed, JavaMLWritable, JavaMLReadable): """ Alternating Least Squares (ALS) matrix factorization. @@ -333,7 +333,7 @@ def getFinalStorageLevel(self): return self.getOrDefault(self.finalStorageLevel) -class ALSModel(JavaModel, JavaMLWritable, JavaMLReadable): +class ALSModel(JavaModel, HasPredictionCol, JavaMLWritable, JavaMLReadable): """ Model fitted by ALS. diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 9233d2e7e1a7..2a410c261291 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -129,7 +129,10 @@ def _create_model(self, java_model): return LinearRegressionModel(java_model) -class LinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable): +class LinearRegressionModel(JavaModel, JavaPredictionModel, HasFeaturesCol, HasLabelCol, + HasPredictionCol, HasMaxIter, HasRegParam, HasTol, + HasElasticNetParam, HasFitIntercept, HasStandardization, + HasSolver, JavaMLWritable, JavaMLReadable): """ Model fitted by :class:`LinearRegression`. @@ -502,7 +505,9 @@ def getFeatureIndex(self): return self.getOrDefault(self.featureIndex) -class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): +class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable, + HasFeaturesCol, HasLabelCol, HasPredictionCol, + HasWeightCol): """ Model fitted by :class:`IsotonicRegression`. @@ -560,6 +565,7 @@ class TreeRegressorParams(Params): """ supportedImpurities = ["variance"] + # a placeholder to make it appear in the generated doc impurity = Param(Params._dummy(), "impurity", "Criterion used for information gain calculation (case-insensitive). " + "Supported options: " + @@ -724,9 +730,9 @@ def _create_model(self, java_model): @inherit_doc -class DecisionTreeModel(JavaModel, JavaPredictionModel): - """ - Abstraction for Decision Tree models. +class DecisionTreeModel(JavaModel, JavaPredictionModel, + HasFeaturesCol, HasLabelCol, HasPredictionCol): + """Abstraction for Decision Tree models. .. versionadded:: 1.5.0 """ @@ -916,8 +922,9 @@ def _create_model(self, java_model): return RandomForestRegressionModel(java_model) -class RandomForestRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable, - JavaMLReadable): +class RandomForestRegressionModel(TreeEnsembleModel, JavaPredictionModel, HasFeaturesCol, + HasLabelCol, HasPredictionCol, + JavaMLWritable, JavaMLReadable): """ Model fitted by :class:`RandomForestRegressor`. @@ -1057,7 +1064,10 @@ def getLossType(self): return self.getOrDefault(self.lossType) -class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable): +class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel, + HasFeaturesCol, HasLabelCol, + HasPredictionCol, JavaMLWritable, JavaMLReadable): + """ Model fitted by :class:`GBTRegressor`. @@ -1231,7 +1241,9 @@ def getQuantilesCol(self): return self.getOrDefault(self.quantilesCol) -class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): +class AFTSurvivalRegressionModel(JavaModel, HasFeaturesCol, HasLabelCol, + HasPredictionCol, HasFitIntercept, HasMaxIter, + HasTol, JavaMLWritable, JavaMLReadable): """ .. note:: Experimental @@ -1425,8 +1437,10 @@ def getLink(self): return self.getOrDefault(self.link) -class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable, - JavaMLReadable): +class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, HasLabelCol, HasFeaturesCol, + HasPredictionCol, HasFitIntercept, HasMaxIter, HasTol, + HasRegParam, HasWeightCol, HasSolver, + JavaMLWritable, JavaMLReadable): """ .. note:: Experimental diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py index 25c44b7533c7..4adfb3e99b96 100644 --- a/python/pyspark/ml/wrapper.py +++ b/python/pyspark/ml/wrapper.py @@ -19,8 +19,8 @@ from pyspark import SparkContext from pyspark.sql import DataFrame -from pyspark.ml import Estimator, Transformer, Model from pyspark.ml.param import Params +from pyspark.ml import Estimator, Transformer, Model from pyspark.ml.util import _jvm from pyspark.ml.common import inherit_doc, _java2py, _py2java @@ -138,9 +138,7 @@ def _to_java(self): """ Transfer this instance's Params to the wrapped Java object, and return the Java object. Used for ML persistence. - Meta-algorithms such as Pipeline should override this method. - :return: Java object equivalent to this instance. """ self._transfer_params_to_java() @@ -151,7 +149,6 @@ def _from_java(java_stage): """ Given a Java object, create and return a Python wrapper of it. Used for ML persistence. - Meta-algorithms such as Pipeline should override this method as a classmethod. """ def __get_class(clazz): @@ -200,7 +197,6 @@ def _create_model(self, java_model): def _fit_java(self, dataset): """ Fits a Java model to the input dataset. - :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame` :param params: additional params (overwriting embedded values) @@ -211,7 +207,8 @@ def _fit_java(self, dataset): def _fit(self, dataset): java_model = self._fit_java(dataset) - return self._create_model(java_model) + model = self._create_model(java_model) + return self._copyValues(model) @inherit_doc