Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Support an extended display mode for Dataset.show
  • Loading branch information
maropu committed Apr 24, 2017
commit ca8bfbd4f55962773b037c804f827d4f06d95cdd
9 changes: 6 additions & 3 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ setMethod("isLocal",
#' 20 characters will be truncated. However, if set greater than zero,
#' truncates strings longer than \code{truncate} characters and all cells
#' will be aligned right.
#' @param extendedMode enable expanded table formatting mode to print a column data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extendedMode -> vertical?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, STGM. I'll update. Thanks!

#' per line
#' @param ... further arguments to be passed to or from other methods.
#' @family SparkDataFrame functions
#' @aliases showDF,SparkDataFrame-method
Expand All @@ -210,12 +212,13 @@ setMethod("isLocal",
#' @note showDF since 1.4.0
setMethod("showDF",
signature(x = "SparkDataFrame"),
function(x, numRows = 20, truncate = TRUE) {
function(x, numRows = 20, truncate = TRUE, extendedMode = FALSE) {
if (is.logical(truncate) && truncate) {
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20))
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20), extendedMode)
} else {
truncate2 <- as.numeric(truncate)
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(truncate2))
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(truncate2),
extendedMode)
}
cat(s)
})
Expand Down
15 changes: 12 additions & 3 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,15 @@ def isStreaming(self):
return self._jdf.isStreaming()

@since(1.3)
def show(self, n=20, truncate=True):
def show(self, n=20, truncate=True, extendedMode=False):
"""Prints the first ``n`` rows to the console.

:param n: Number of rows to show.
:param truncate: If set to True, truncate strings longer than 20 chars by default.
If set to a number greater than one, truncates long strings to length ``truncate``
and align cells right.
:param extendedMode: Enable expanded table formatting mode to print a column data
per line.

>>> df
DataFrame[age: int, name: string]
Expand All @@ -314,11 +316,18 @@ def show(self, n=20, truncate=True):
| 2| Ali|
| 5| Bob|
+---+----+
>>> df.show(extendedMode=True)
-RECORD 0-----
age | 2
name | Alice
-RECORD 1-----
age | 5
name | Bob
"""
if isinstance(truncate, bool) and truncate:
print(self._jdf.showString(n, 20))
print(self._jdf.showString(n, 20, extendedMode))
else:
print(self._jdf.showString(n, int(truncate)))
print(self._jdf.showString(n, int(truncate), extendedMode))

def __repr__(self):
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
Expand Down
136 changes: 107 additions & 29 deletions sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,10 @@ class Dataset[T] private[sql](
* @param _numRows Number of rows to show
* @param truncate If set to more than 0, truncates strings to `truncate` characters and
* all cells will be aligned right.
* @param extendedMode Enable expanded table formatting mode to print a column data per line.
*/
private[sql] def showString(_numRows: Int, truncate: Int = 20): String = {
private[sql] def showString(
_numRows: Int, truncate: Int = 20, extendedMode: Boolean = false): String = {
val numRows = _numRows.max(0)
val takeResult = toDF().take(numRows + 1)
val hasMoreData = takeResult.length > numRows
Expand Down Expand Up @@ -277,43 +279,73 @@ class Dataset[T] private[sql](

val sb = new StringBuilder
val numCols = schema.fieldNames.length
// We set a minimum column width at '3'
val minimumColWidth = 3

// Initialise the width of each column to a minimum value of '3'
val colWidths = Array.fill(numCols)(3)
if (!extendedMode) {
// Initialise the width of each column to a minimum value
val colWidths = Array.fill(numCols)(minimumColWidth)

// Compute the width of each column
for (row <- rows) {
for ((cell, i) <- row.zipWithIndex) {
colWidths(i) = math.max(colWidths(i), cell.length)
}
}

// Create SeparateLine
val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

// column names
rows.head.zipWithIndex.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
// Compute the width of each column
for (row <- rows) {
for ((cell, i) <- row.zipWithIndex) {
colWidths(i) = math.max(colWidths(i), cell.length)
}
}
}.addString(sb, "|", "|", "|\n")

sb.append(sep)
// Create SeparateLine
val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

// data
rows.tail.map {
_.zipWithIndex.map { case (cell, i) =>
// column names
rows.head.zipWithIndex.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
StringUtils.rightPad(cell, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")
}

sb.append(sep)
sb.append(sep)

// data
rows.tail.foreach {
_.zipWithIndex.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")
}

sb.append(sep)
} else {
// Extended display mode enabled
val fieldNames = rows.head
val dataRows = rows.tail

// Compute the width of field name and data columns
val fieldNameColWidth = fieldNames.foldLeft(minimumColWidth) { case (curMax, fieldName) =>
math.max(curMax, fieldName.length)
}
val dataColWidth = dataRows.foldLeft(minimumColWidth) { case (curMax, row) =>
math.max(curMax, row.map(_.length).reduceLeftOption[Int] { case (cellMax, cell) =>
math.max(cellMax, cell)
}.getOrElse(0))
}

dataRows.zipWithIndex.foreach { case (row, i) =>
Copy link
Member

@gatorsmile gatorsmile Apr 26, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like the non-vertical output, when no row exists, we at least need to output the column names?

df.limit(0).show(20, 0, true)

Copy link
Member Author

@maropu maropu Apr 26, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked and found that both pg and mysql output no column name in the mode;

// pg
postgres=# create table t(a INT, b TEXT);
CREATE TABLE
postgres=# select * from t;
 a | b 
---+---
(0 rows)

postgres=# \x
Expanded display is on.
postgres=# select * from t;
(0 rows)


// mysql
mysql -u root --vertical

mysql> create table t(a INT, b TEXT);
Query OK, 0 rows affected (0.04 sec)

mysql> select * from t;
Empty set (0.00 sec)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now, in this PR, we output nothing in this case. Postgres and MySQL at least output the message to indicate the result set is empty.

cc @cloud-fan @sameeragarwal @hvanhovell @rxin Any suggestion here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, I see. I'll update. Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for indicating that result set is empty. About displaying column names if the output is empty, perhaps it'd be best to stick with the postgres/mysql semantics.

// "+ 5" in size means a character length except for padded names and data
val rowHeader = StringUtils.rightPad(
s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-")
sb.append(rowHeader).append("\n")
row.zipWithIndex.map { case (cell, j) =>
val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth)
val data = StringUtils.rightPad(cell, dataColWidth)
s" $fieldName | $data "
}.addString(sb, "", "\n", "\n")
}
}

// For Data that has more than "numRows" records
if (hasMoreData) {
Expand Down Expand Up @@ -663,8 +695,54 @@ class Dataset[T] private[sql](
* @group action
* @since 1.6.0
*/
def show(numRows: Int, truncate: Int): Unit = show(numRows, truncate, extendedMode = false)

/**
* Displays the Dataset in a tabular form. For example:
* {{{
* year month AVG('Adj Close) MAX('Adj Close)
* 1980 12 0.503218 0.595103
* 1981 01 0.523289 0.570307
* 1982 02 0.436504 0.475256
* 1983 03 0.410516 0.442194
* 1984 04 0.450090 0.483521
* }}}
*
* If `extendedMode` enabled, this command prints a column dat per line:
* {{{
* -RECORD 0-----------------
* c0 | 0.6988392500990668
* c1 | 0.3035961718851606
* c2 | 0.2446213804275899
* c3 | 0.6132556607194246
* c4 | 0.1904412430355646
* c5 | 0.8856600775630444
* -RECORD 1-----------------
* c0 | 0.3942727621020799
* c1 | 0.6501707200059537
* c2 | 0.2550059028276454
* c3 | 0.9806662488156962
* c4 | 0.8533897091838063
* c5 | 0.3911189623246518
* -RECORD 2-----------------
* c0 | 0.9024183805969801
* c1 | 0.0242018765375147
* c2 | 0.8508820250344251
* c3 | 0.4593368817024575
* c4 | 0.2216918145613194
* c5 | 0.3756882647319614
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you use the same example, as shown above?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

* }}}
*
* @param numRows Number of rows to show
* @param truncate If set to more than 0, truncates strings to `truncate` characters and
* all cells will be aligned right.
* @param extendedMode Enable expanded table formatting mode to print a column data per line.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can follow what MySQL documents.

@param vertical Displays the result vertically.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one? https://dev.mysql.com/doc/refman/5.7/en/mysql-command-options.html
Print query output rows vertically (one line per column value)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

* @group action
* @since 2.3.0
*/
// scalastyle:off println
def show(numRows: Int, truncate: Int): Unit = println(showString(numRows, truncate))
def show(numRows: Int, truncate: Int, extendedMode: Boolean): Unit =
println(showString(numRows, truncate, extendedMode))
// scalastyle:on println

/**
Expand Down
Loading