Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -2234,28 +2234,33 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
windowSize = Param(Params._dummy(), "windowSize",
"the window size (context words from [-window, window]). Default value is 5",
typeConverter=TypeConverters.toInt)
maxSentenceLength = Param(Params._dummy(), "maxSentenceLength",
"Maximum length (in words) of each sentence in the input data. " +
"Any sentence longer than this threshold will " +
"be divided into chunks up to the size.",
typeConverter=TypeConverters.toInt)

@keyword_only
def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=None, inputCol=None, outputCol=None, windowSize=5):
seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
"""
__init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
seed=None, inputCol=None, outputCol=None, windowSize=5)
seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
"""
super(Word2Vec, self).__init__()
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=None, windowSize=5)
seed=None, windowSize=5, maxSentenceLength=1000)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)

@keyword_only
@since("1.4.0")
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=None, inputCol=None, outputCol=None, windowSize=5):
seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
"""
setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \
inputCol=None, outputCol=None, windowSize=5)
inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
Sets params for this Word2Vec.
"""
kwargs = self.setParams._input_kwargs
Expand Down Expand Up @@ -2317,6 +2322,20 @@ def getWindowSize(self):
"""
return self.getOrDefault(self.windowSize)

@since("2.0.0")
def setMaxSentenceLength(self, value):
"""
Sets the value of :py:attr:`maxSentenceLength`.
"""
return self._set(maxSentenceLength=value)

@since("2.0.0")
def getMaxSentenceLength(self):
"""
Gets the value of maxSentenceLength or its default value.
"""
return self.getOrDefault(self.maxSentenceLength)

def _create_model(self, java_model):
return Word2VecModel(java_model)

Expand Down