Skip to content
Closed
Prev Previous commit
Next Next commit
Update python dataFrame show test and add empty df unit test.
  • Loading branch information
云峤 committed May 4, 2015
commit 84aec3ec56f9327a2fe08a053568f56fbeffa15a
105 changes: 69 additions & 36 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,12 @@ def show(self, n=20):
>>> df
DataFrame[age: int, name: string]
>>> df.show()
age name
2 Alice
5 Bob
+---+-----+
|age| name|
+---+-----+
| 2|Alice|
| 5| Bob|
+---+-----+
"""
print(self._jdf.showString(n))

Expand Down Expand Up @@ -591,12 +594,15 @@ def describe(self, *cols):
given, this function computes statistics for all numerical columns.

>>> df.describe().show()
summary age
count 2
mean 3.5
stddev 1.5
min 2
max 5
+-------+---+
|summary|age|
+-------+---+
| count| 2|
| mean|3.5|
| stddev|1.5|
| min| 2|
| max| 5|
+-------+---+
"""
jdf = self._jdf.describe(self._jseq(cols))
return DataFrame(jdf, self.sql_ctx)
Expand Down Expand Up @@ -801,12 +807,18 @@ def dropna(self, how='any', thresh=None, subset=None):
:param subset: optional list of column names to consider.

>>> df4.dropna().show()
age height name
10 80 Alice
+---+------+-----+
|age|height| name|
+---+------+-----+
| 10| 80|Alice|
+---+------+-----+

>>> df4.na.drop().show()
age height name
10 80 Alice
+---+------+-----+
|age|height| name|
+---+------+-----+
| 10| 80|Alice|
+---+------+-----+
"""
if how is not None and how not in ['any', 'all']:
raise ValueError("how ('" + how + "') should be 'any' or 'all'")
Expand Down Expand Up @@ -837,25 +849,34 @@ def fillna(self, value, subset=None):
then the non-string column is simply ignored.

>>> df4.fillna(50).show()
age height name
10 80 Alice
5 50 Bob
50 50 Tom
50 50 null
+---+------+-----+
|age|height| name|
+---+------+-----+
| 10| 80|Alice|
| 5| 50| Bob|
| 50| 50| Tom|
| 50| 50| null|
+---+------+-----+

>>> df4.fillna({'age': 50, 'name': 'unknown'}).show()
age height name
10 80 Alice
5 null Bob
50 null Tom
50 null unknown
+---+------+-------+
|age|height| name|
+---+------+-------+
| 10| 80| Alice|
| 5| null| Bob|
| 50| null| Tom|
| 50| null|unknown|
+---+------+-------+

>>> df4.na.fill({'age': 50, 'name': 'unknown'}).show()
age height name
10 80 Alice
5 null Bob
50 null Tom
50 null unknown
+---+------+-------+
|age|height| name|
+---+------+-------+
| 10| 80| Alice|
| 5| null| Bob|
| 50| null| Tom|
| 50| null|unknown|
+---+------+-------+
"""
if not isinstance(value, (float, int, long, basestring, dict)):
raise ValueError("value should be a float, int, long, string, or dict")
Expand Down Expand Up @@ -1220,11 +1241,17 @@ def getItem(self, key):

>>> df = sc.parallelize([([1, 2], {"key": "value"})]).toDF(["l", "d"])
>>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
l[0] d[key]
1 value
+----+------+
|l[0]|d[key]|
+----+------+
| 1| value|
+----+------+
>>> df.select(df.l[0], df.d["key"]).show()
l[0] d[key]
1 value
+----+------+
|l[0]|d[key]|
+----+------+
| 1| value|
+----+------+
"""
return self[key]

Expand All @@ -1234,11 +1261,17 @@ def getField(self, name):
>>> from pyspark.sql import Row
>>> df = sc.parallelize([Row(r=Row(a=1, b="b"))]).toDF()
>>> df.select(df.r.getField("b")).show()
r.b
b
+---+
|r.b|
+---+
| b|
+---+
>>> df.select(df.r.a).show()
r.a
1
+---+
|r.a|
+---+
| 1|
+---+
"""
return Column(self._jc.getField(name))

Expand Down
16 changes: 14 additions & 2 deletions sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -599,8 +599,20 @@ class DataFrameSuite extends QueryTest {
}

test("SPARK-7319 showString") {
assert(testData.select($"*").showString(1).split("\n") === Seq("+---+-----+",
"|key|value|", "+---+-----+", "| 1| 1|", "+---+-----+"))
assert(testData.select($"*").showString(1) === """+---+-----+
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's pretty awkward to have this at the end. I'd just create a variable for the expected string so we don't have so much whitespace.

||key|value|
|+---+-----+
|| 1| 1|
|+---+-----+
|""".stripMargin)
}

test("SPARK-7327 show with empty dataFrame") {
assert(testData.select($"*").filter($"key" < 0).showString(1) === """+---+-----+
||key|value|
|+---+-----+
|+---+-----+
|""".stripMargin)
}

test("createDataFrame(RDD[Row], StructType) should convert UDTs (SPARK-6672)") {
Expand Down