Skip to content

Commit 98c37a9

Browse files
committed
PyArrow 0.8.0 isn't published anymore, fix minor test errors
1 parent 0380ab4 commit 98c37a9

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ unittest2>=1.0.0
88
pandas>=0.13
99
spacy
1010
future
11-
pyarrow==0.8.0
11+
pyarrow==0.11.0
1212
flake8==3.5.0
1313
nltk

sparklingml/feature/python_pipelines.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class SpacyTokenizeTransformer(Model, HasInputCol, HasOutputCol):
8787
... ["vals", "label"])
8888
>>> tr = SpacyTokenizeTransformer(inputCol="vals", outputCol="c")
8989
>>> str(tr.getLang())
90-
'en'
90+
'en_core_web_sm'
9191
>>> tr.transform(df).head().c
9292
[u'hi', u'boo']
9393
>>> from pyspark.ml import Pipeline
@@ -150,7 +150,7 @@ class SpacyAdvancedTokenizeTransformer(Model, HasInputCol, HasOutputCol):
150150
>>> df = spark.createDataFrame([("hi boo",), ("bye boo",)], ["vals"])
151151
>>> tr = SpacyAdvancedTokenizeTransformer(inputCol="vals", outputCol="c")
152152
>>> str(tr.getLang())
153-
'en'
153+
'en_core_web_sm'
154154
>>> tr.getSpacyFields()
155155
['_', 'ancestors', ...
156156
>>> tr.setSpacyFields(["text", "lang_"])
@@ -188,7 +188,8 @@ def __init__(self, lang=None,
188188
def setParams(self, lang="en_core_web_sm", spacyFields=None,
189189
inputCol=None, outputCol=None):
190190
"""
191-
setParams(self, lang="en_core_web_sm", SpacyAdvancedTokenize.default_fields,
191+
setParams(self, lang="en_core_web_sm",
192+
SpacyAdvancedTokenize.default_fields,
192193
inputCol=None, outputCol=None):
193194
"""
194195
kwargs = self._input_kwargs

0 commit comments

Comments
 (0)