Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ setMethod("isLocal",
#' 20 characters will be truncated. However, if set greater than zero,
#' truncates strings longer than \code{truncate} characters and all cells
#' will be aligned right.
#' @param vertical whether print output rows vertically (one line per column value).
#' @param ... further arguments to be passed to or from other methods.
#' @family SparkDataFrame functions
#' @aliases showDF,SparkDataFrame-method
Expand All @@ -210,12 +211,13 @@ setMethod("isLocal",
#' @note showDF since 1.4.0
setMethod("showDF",
signature(x = "SparkDataFrame"),
function(x, numRows = 20, truncate = TRUE) {
function(x, numRows = 20, truncate = TRUE, vertical = FALSE) {
if (is.logical(truncate) && truncate) {
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20))
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20), vertical)
} else {
truncate2 <- as.numeric(truncate)
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(truncate2))
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(truncate2),
vertical)
}
cat(s)
})
Expand Down
15 changes: 12 additions & 3 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,15 @@ def isStreaming(self):
return self._jdf.isStreaming()

@since(1.3)
def show(self, n=20, truncate=True):
def show(self, n=20, truncate=True, vertical=False):
"""Prints the first ``n`` rows to the console.

:param n: Number of rows to show.
:param truncate: If set to True, truncate strings longer than 20 chars by default.
If set to a number greater than one, truncates long strings to length ``truncate``
and align cells right.
:param vertical: If set to True, print output rows vertically (one line
per column value).

>>> df
DataFrame[age: int, name: string]
Expand All @@ -314,11 +316,18 @@ def show(self, n=20, truncate=True):
| 2| Ali|
| 5| Bob|
+---+----+
>>> df.show(vertical=True)
-RECORD 0-----
age | 2
name | Alice
-RECORD 1-----
age | 5
name | Bob
"""
if isinstance(truncate, bool) and truncate:
print(self._jdf.showString(n, 20))
print(self._jdf.showString(n, 20, vertical))
else:
print(self._jdf.showString(n, int(truncate)))
print(self._jdf.showString(n, int(truncate), vertical))

def __repr__(self):
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
Expand Down
149 changes: 118 additions & 31 deletions sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,10 @@ class Dataset[T] private[sql](
* @param _numRows Number of rows to show
* @param truncate If set to more than 0, truncates strings to `truncate` characters and
* all cells will be aligned right.
* @param vertical If set to true, prints output rows vertically (one line per column value).
*/
private[sql] def showString(_numRows: Int, truncate: Int = 20): String = {
private[sql] def showString(
_numRows: Int, truncate: Int = 20, vertical: Boolean = false): String = {
val numRows = _numRows.max(0)
val takeResult = toDF().take(numRows + 1)
val hasMoreData = takeResult.length > numRows
Expand Down Expand Up @@ -277,46 +279,80 @@ class Dataset[T] private[sql](

val sb = new StringBuilder
val numCols = schema.fieldNames.length
// We set a minimum column width at '3'
val minimumColWidth = 3

// Initialise the width of each column to a minimum value of '3'
val colWidths = Array.fill(numCols)(3)
if (!vertical) {
// Initialise the width of each column to a minimum value
val colWidths = Array.fill(numCols)(minimumColWidth)

// Compute the width of each column
for (row <- rows) {
for ((cell, i) <- row.zipWithIndex) {
colWidths(i) = math.max(colWidths(i), cell.length)
}
}

// Create SeparateLine
val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

// column names
rows.head.zipWithIndex.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
// Compute the width of each column
for (row <- rows) {
for ((cell, i) <- row.zipWithIndex) {
colWidths(i) = math.max(colWidths(i), cell.length)
}
}
}.addString(sb, "|", "|", "|\n")

sb.append(sep)
// Create SeparateLine
val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

// data
rows.tail.map {
_.zipWithIndex.map { case (cell, i) =>
// column names
rows.head.zipWithIndex.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
StringUtils.rightPad(cell, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")
}

sb.append(sep)
sb.append(sep)

// data
rows.tail.foreach {
_.zipWithIndex.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")
}

sb.append(sep)
} else {
// Extended display mode enabled
val fieldNames = rows.head
val dataRows = rows.tail

// Compute the width of field name and data columns
val fieldNameColWidth = fieldNames.foldLeft(minimumColWidth) { case (curMax, fieldName) =>
math.max(curMax, fieldName.length)
}
val dataColWidth = dataRows.foldLeft(minimumColWidth) { case (curMax, row) =>
math.max(curMax, row.map(_.length).reduceLeftOption[Int] { case (cellMax, cell) =>
math.max(cellMax, cell)
}.getOrElse(0))
}

dataRows.zipWithIndex.foreach { case (row, i) =>
Copy link
Member

@gatorsmile gatorsmile Apr 26, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like the non-vertical output, when no row exists, we at least need to output the column names?

df.limit(0).show(20, 0, true)

Copy link
Member Author

@maropu maropu Apr 26, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked and found that both pg and mysql output no column name in the mode;

// pg
postgres=# create table t(a INT, b TEXT);
CREATE TABLE
postgres=# select * from t;
 a | b 
---+---
(0 rows)

postgres=# \x
Expanded display is on.
postgres=# select * from t;
(0 rows)


// mysql
mysql -u root --vertical

mysql> create table t(a INT, b TEXT);
Query OK, 0 rows affected (0.04 sec)

mysql> select * from t;
Empty set (0.00 sec)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now, in this PR, we output nothing in this case. Postgres and MySQL at least output the message to indicate the result set is empty.

cc @cloud-fan @sameeragarwal @hvanhovell @rxin Any suggestion here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, I see. I'll update. Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for indicating that result set is empty. About displaying column names if the output is empty, perhaps it'd be best to stick with the postgres/mysql semantics.

// "+ 5" in size means a character length except for padded names and data
val rowHeader = StringUtils.rightPad(
s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-")
sb.append(rowHeader).append("\n")
row.zipWithIndex.map { case (cell, j) =>
val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth)
val data = StringUtils.rightPad(cell, dataColWidth)
s" $fieldName | $data "
}.addString(sb, "", "\n", "\n")
}
}

// For Data that has more than "numRows" records
if (hasMoreData) {
// Print a footer
if (vertical && data.isEmpty) {
// In a vertical mode, print an empty row set explicitly
sb.append("(0 rows)\n")
} else if (hasMoreData) {
// For Data that has more than "numRows" records
val rowsString = if (numRows == 1) "row" else "rows"
sb.append(s"only showing top $numRows $rowsString\n")
}
Expand Down Expand Up @@ -663,8 +699,59 @@ class Dataset[T] private[sql](
* @group action
* @since 1.6.0
*/
def show(numRows: Int, truncate: Int): Unit = show(numRows, truncate, vertical = false)

/**
* Displays the Dataset in a tabular form. For example:
* {{{
* year month AVG('Adj Close) MAX('Adj Close)
* 1980 12 0.503218 0.595103
* 1981 01 0.523289 0.570307
* 1982 02 0.436504 0.475256
* 1983 03 0.410516 0.442194
* 1984 04 0.450090 0.483521
* }}}
*
* If `vertical` enabled, this command prints output rows vertically (one line per column value)?
*
* {{{
* -RECORD 0-------------------
* year | 1980
* month | 12
* AVG('Adj Close) | 0.503218
* AVG('Adj Close) | 0.595103
* -RECORD 1-------------------
* year | 1981
* month | 01
* AVG('Adj Close) | 0.523289
* AVG('Adj Close) | 0.570307
* -RECORD 2-------------------
* year | 1982
* month | 02
* AVG('Adj Close) | 0.436504
* AVG('Adj Close) | 0.475256
* -RECORD 3-------------------
* year | 1983
* month | 03
* AVG('Adj Close) | 0.410516
* AVG('Adj Close) | 0.442194
* -RECORD 4-------------------
* year | 1984
* month | 04
* AVG('Adj Close) | 0.450090
* AVG('Adj Close) | 0.483521
* }}}
*
* @param numRows Number of rows to show
* @param truncate If set to more than 0, truncates strings to `truncate` characters and
* all cells will be aligned right.
* @param vertical If set to true, prints output rows vertically (one line per column value).
* @group action
* @since 2.3.0
*/
// scalastyle:off println
def show(numRows: Int, truncate: Int): Unit = println(showString(numRows, truncate))
def show(numRows: Int, truncate: Int, vertical: Boolean): Unit =
println(showString(numRows, truncate, vertical))
// scalastyle:on println

/**
Expand Down
Loading