Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
include changes made by SPARK-11569
Signed-off-by: VinceShieh <[email protected]>
  • Loading branch information
VinceShieh committed Mar 17, 2017
commit 1d2f28f2449691fe7efe3e7dfbfe577ad8a56c1f
10 changes: 5 additions & 5 deletions python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -1936,7 +1936,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
... key=lambda x: x[0])
[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')]
>>> testData2 = sc.parallelize([Row(id=0, label="a"), Row(id=1, label="d"),
... Row(id=2, label="e")], 2)
... Row(id=2, label=None)], 2)
>>> dfKeep= spark.createDataFrame(testData2)
>>> modelKeep = stringIndexer.setHandleInvalid("keep").fit(stringIndDf)
>>> tdK = modelKeep.transform(dfKeep)
Expand All @@ -1962,10 +1962,10 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
.. versionadded:: 1.4.0
"""

handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle unseen labels. " +
"Options are 'skip' (filter out rows with unseen labels), " +
"error (throw an error), or 'keep' (put unseen labels in a special " +
"additional bucket, at index numLabels).",
handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid data (unseen " +
"labels or NULL values). Options are 'skip' (filter out rows with " +
"invalid data), error (throw an error), or 'keep' (put invalid data " +
"in a special additional bucket, at index numLabels).",
typeConverter=TypeConverters.toString)

@keyword_only
Expand Down