Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add missing method for ml.feature
  • Loading branch information
yanboliang committed Aug 19, 2015
commit 5c1c4453b0b5b3e87fdc569d92a98475ca07e459
25 changes: 25 additions & 0 deletions python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,8 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):
>>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),
... key=lambda x: x[0])
[(0, 0.0), (1, 2.0), (2, 1.0), (3, 0.0), (4, 0.0), (5, 1.0)]
>>> type(model.labels)
(u'a', u'c', u'b')
"""

@keyword_only
Expand Down Expand Up @@ -818,6 +820,13 @@ class StringIndexerModel(JavaModel):
Model fitted by StringIndexer.
"""

@property
def labels(self):
"""
Ordered list of labels, corresponding to indices to be assigned.
"""
return self._call_java("labels")


@inherit_doc
@ignore_unicode_prefix
Expand Down Expand Up @@ -1006,6 +1015,22 @@ class VectorIndexerModel(JavaModel):
Model fitted by VectorIndexer.
"""

@property
def numFeatures(self):
"""
Number of features, i.e., length of Vectors which this transforms.
"""
return self._call_java("numFeatures")

@property
def categoryMaps(self):
"""
Feature value index. Keys are categorical feature indices (column indices).
Values are maps from original features values to 0-based category indices.
If a feature is not in this map, it is treated as continuous.
"""
return self._call_java("javaCategoryMaps")


@inherit_doc
@ignore_unicode_prefix
Expand Down