Support an extended display mode for Dataset.show

apache · maropu · Apr 21, 2017 · Apr 26, 2017 · Apr 26, 2017 · Apr 24, 2017
commit ca8bfbd4f55962773b037c804f827d4f06d95cdd
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -194,6 +194,8 @@ setMethod("isLocal",
 #'                 20 characters will be truncated. However, if set greater than zero,
 #'                 truncates strings longer than \code{truncate} characters and all cells
 #'                 will be aligned right.
+#' @param extendedMode enable expanded table formatting mode to print a column data
+#'                     per line
 #' @param ... further arguments to be passed to or from other methods.
 #' @family SparkDataFrame functions
 #' @aliases showDF,SparkDataFrame-method
@@ -210,12 +212,13 @@ setMethod("isLocal",
 #' @note showDF since 1.4.0
 setMethod("showDF",
           signature(x = "SparkDataFrame"),
-          function(x, numRows = 20, truncate = TRUE) {
+          function(x, numRows = 20, truncate = TRUE, extendedMode = FALSE) {
             if (is.logical(truncate) && truncate) {
-              s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20))
+              s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20), extendedMode)
             } else {
               truncate2 <- as.numeric(truncate)
-              s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(truncate2))
+              s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(truncate2),
+                               extendedMode)
             }
             cat(s)
           })

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
@@ -290,13 +290,15 @@ def isStreaming(self):
         return self._jdf.isStreaming()
 
     @since(1.3)
-    def show(self, n=20, truncate=True):
+    def show(self, n=20, truncate=True, extendedMode=False):
         """Prints the first ``n`` rows to the console.
 
         :param n: Number of rows to show.
         :param truncate: If set to True, truncate strings longer than 20 chars by default.
             If set to a number greater than one, truncates long strings to length ``truncate``
             and align cells right.
+        :param extendedMode: Enable expanded table formatting mode to print a column data
+            per line.
 
         >>> df
         DataFrame[age: int, name: string]
@@ -314,11 +316,18 @@ def show(self, n=20, truncate=True):
         |  2| Ali|
         |  5| Bob|
         +---+----+
+        >>> df.show(extendedMode=True)
+        -RECORD 0-----
+         age  | 2
+         name | Alice
+        -RECORD 1-----
+         age  | 5
+         name | Bob
         """
         if isinstance(truncate, bool) and truncate:
-            print(self._jdf.showString(n, 20))
+            print(self._jdf.showString(n, 20, extendedMode))
         else:
-            print(self._jdf.showString(n, int(truncate)))
+            print(self._jdf.showString(n, int(truncate), extendedMode))
 
     def __repr__(self):
         return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -240,8 +240,10 @@ class Dataset[T] private[sql](
    * @param _numRows Number of rows to show
    * @param truncate If set to more than 0, truncates strings to `truncate` characters and
    *                   all cells will be aligned right.
+   * @param extendedMode Enable expanded table formatting mode to print a column data per line.
    */
-  private[sql] def showString(_numRows: Int, truncate: Int = 20): String = {
+  private[sql] def showString(
+      _numRows: Int, truncate: Int = 20, extendedMode: Boolean = false): String = {
     val numRows = _numRows.max(0)
     val takeResult = toDF().take(numRows + 1)
     val hasMoreData = takeResult.length > numRows
@@ -277,43 +279,73 @@ class Dataset[T] private[sql](
 
     val sb = new StringBuilder
     val numCols = schema.fieldNames.length
+    // We set a minimum column width at '3'
+    val minimumColWidth = 3
 
-    // Initialise the width of each column to a minimum value of '3'
-    val colWidths = Array.fill(numCols)(3)
+    if (!extendedMode) {
+      // Initialise the width of each column to a minimum value
+      val colWidths = Array.fill(numCols)(minimumColWidth)
 
-    // Compute the width of each column
-    for (row <- rows) {
-      for ((cell, i) <- row.zipWithIndex) {
-        colWidths(i) = math.max(colWidths(i), cell.length)
-      }
-    }
-
-    // Create SeparateLine
-    val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()
-
-    // column names
-    rows.head.zipWithIndex.map { case (cell, i) =>
-      if (truncate > 0) {
-        StringUtils.leftPad(cell, colWidths(i))
-      } else {
-        StringUtils.rightPad(cell, colWidths(i))
+      // Compute the width of each column
+      for (row <- rows) {
+        for ((cell, i) <- row.zipWithIndex) {
+          colWidths(i) = math.max(colWidths(i), cell.length)
+        }
       }
-    }.addString(sb, "|", "|", "|\n")
 
-    sb.append(sep)
+      // Create SeparateLine
+      val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()
 
-    // data
-    rows.tail.map {
-      _.zipWithIndex.map { case (cell, i) =>
+      // column names
+      rows.head.zipWithIndex.map { case (cell, i) =>
         if (truncate > 0) {
-          StringUtils.leftPad(cell.toString, colWidths(i))
+          StringUtils.leftPad(cell, colWidths(i))
         } else {
-          StringUtils.rightPad(cell.toString, colWidths(i))
+          StringUtils.rightPad(cell, colWidths(i))
         }
       }.addString(sb, "|", "|", "|\n")
-    }
 
-    sb.append(sep)
+      sb.append(sep)
+
+      // data
+      rows.tail.foreach {
+        _.zipWithIndex.map { case (cell, i) =>
+          if (truncate > 0) {
+            StringUtils.leftPad(cell.toString, colWidths(i))
+          } else {
+            StringUtils.rightPad(cell.toString, colWidths(i))
+          }
+        }.addString(sb, "|", "|", "|\n")
+      }
+
+      sb.append(sep)
+    } else {
+      // Extended display mode enabled
+      val fieldNames = rows.head
+      val dataRows = rows.tail
+
+      // Compute the width of field name and data columns
+      val fieldNameColWidth = fieldNames.foldLeft(minimumColWidth) { case (curMax, fieldName) =>
+        math.max(curMax, fieldName.length)
+      }
+      val dataColWidth = dataRows.foldLeft(minimumColWidth) { case (curMax, row) =>
+        math.max(curMax, row.map(_.length).reduceLeftOption[Int] { case (cellMax, cell) =>
+          math.max(cellMax, cell)
+        }.getOrElse(0))
+      }
+
+      dataRows.zipWithIndex.foreach { case (row, i) =>
+        // "+ 5" in size means a character length except for padded names and data
+        val rowHeader = StringUtils.rightPad(
+          s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-")
+        sb.append(rowHeader).append("\n")
+        row.zipWithIndex.map { case (cell, j) =>
+          val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth)
+          val data = StringUtils.rightPad(cell, dataColWidth)
+          s" $fieldName | $data "
+        }.addString(sb, "", "\n", "\n")
+      }
+    }
 
     // For Data that has more than "numRows" records
     if (hasMoreData) {
@@ -663,8 +695,54 @@ class Dataset[T] private[sql](
    * @group action
    * @since 1.6.0
    */
+  def show(numRows: Int, truncate: Int): Unit = show(numRows, truncate, extendedMode = false)
+
+  /**
+   * Displays the Dataset in a tabular form. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   *
+   * If `extendedMode` enabled, this command prints a column dat per line:
+   * {{{
+   * -RECORD 0-----------------
+   *  c0  | 0.6988392500990668
+   *  c1  | 0.3035961718851606
+   *  c2  | 0.2446213804275899
+   *  c3  | 0.6132556607194246
+   *  c4  | 0.1904412430355646
+   *  c5  | 0.8856600775630444
+   * -RECORD 1-----------------
+   *  c0  | 0.3942727621020799
+   *  c1  | 0.6501707200059537
+   *  c2  | 0.2550059028276454
+   *  c3  | 0.9806662488156962
+   *  c4  | 0.8533897091838063
+   *  c5  | 0.3911189623246518
+   * -RECORD 2-----------------
+   *  c0  | 0.9024183805969801
+   *  c1  | 0.0242018765375147
+   *  c2  | 0.8508820250344251
+   *  c3  | 0.4593368817024575
+   *  c4  | 0.2216918145613194
+   *  c5  | 0.3756882647319614
+   * }}}
+   *
+   * @param numRows Number of rows to show
+   * @param truncate If set to more than 0, truncates strings to `truncate` characters and
+   *                    all cells will be aligned right.
+   * @param extendedMode Enable expanded table formatting mode to print a column data per line.
+   * @group action
+   * @since 2.3.0
+   */
   // scalastyle:off println
-  def show(numRows: Int, truncate: Int): Unit = println(showString(numRows, truncate))
+  def show(numRows: Int, truncate: Int, extendedMode: Boolean): Unit =
+    println(showString(numRows, truncate, extendedMode))
   // scalastyle:on println
 
   /**