@@ -87,7 +87,7 @@ class SpacyTokenizeTransformer(Model, HasInputCol, HasOutputCol):
8787 ... ["vals", "label"])
8888 >>> tr = SpacyTokenizeTransformer(inputCol="vals", outputCol="c")
8989 >>> str(tr.getLang())
90- 'en '
90+ 'en_core_web_sm '
9191 >>> tr.transform(df).head().c
9292 [u'hi', u'boo']
9393 >>> from pyspark.ml import Pipeline
@@ -150,7 +150,7 @@ class SpacyAdvancedTokenizeTransformer(Model, HasInputCol, HasOutputCol):
150150 >>> df = spark.createDataFrame([("hi boo",), ("bye boo",)], ["vals"])
151151 >>> tr = SpacyAdvancedTokenizeTransformer(inputCol="vals", outputCol="c")
152152 >>> str(tr.getLang())
153- 'en '
153+ 'en_core_web_sm '
154154 >>> tr.getSpacyFields()
155155 ['_', 'ancestors', ...
156156 >>> tr.setSpacyFields(["text", "lang_"])
@@ -188,7 +188,8 @@ def __init__(self, lang=None,
188188 def setParams (self , lang = "en_core_web_sm" , spacyFields = None ,
189189 inputCol = None , outputCol = None ):
190190 """
191- setParams(self, lang="en_core_web_sm", SpacyAdvancedTokenize.default_fields,
191+ setParams(self, lang="en_core_web_sm",
192+ SpacyAdvancedTokenize.default_fields,
192193 inputCol=None, outputCol=None):
193194 """
194195 kwargs = self ._input_kwargs
0 commit comments