-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20425][SQL] Support a vertical display mode for Dataset.show #17733
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -240,8 +240,10 @@ class Dataset[T] private[sql]( | |
| * @param _numRows Number of rows to show | ||
| * @param truncate If set to more than 0, truncates strings to `truncate` characters and | ||
| * all cells will be aligned right. | ||
| * @param extendedMode Enable expanded table formatting mode to print a column data per line. | ||
| */ | ||
| private[sql] def showString(_numRows: Int, truncate: Int = 20): String = { | ||
| private[sql] def showString( | ||
| _numRows: Int, truncate: Int = 20, extendedMode: Boolean = false): String = { | ||
| val numRows = _numRows.max(0) | ||
| val takeResult = toDF().take(numRows + 1) | ||
| val hasMoreData = takeResult.length > numRows | ||
|
|
@@ -277,43 +279,73 @@ class Dataset[T] private[sql]( | |
|
|
||
| val sb = new StringBuilder | ||
| val numCols = schema.fieldNames.length | ||
| // We set a minimum column width at '3' | ||
| val minimumColWidth = 3 | ||
|
|
||
| // Initialise the width of each column to a minimum value of '3' | ||
| val colWidths = Array.fill(numCols)(3) | ||
| if (!extendedMode) { | ||
| // Initialise the width of each column to a minimum value | ||
| val colWidths = Array.fill(numCols)(minimumColWidth) | ||
|
|
||
| // Compute the width of each column | ||
| for (row <- rows) { | ||
| for ((cell, i) <- row.zipWithIndex) { | ||
| colWidths(i) = math.max(colWidths(i), cell.length) | ||
| } | ||
| } | ||
|
|
||
| // Create SeparateLine | ||
| val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString() | ||
|
|
||
| // column names | ||
| rows.head.zipWithIndex.map { case (cell, i) => | ||
| if (truncate > 0) { | ||
| StringUtils.leftPad(cell, colWidths(i)) | ||
| } else { | ||
| StringUtils.rightPad(cell, colWidths(i)) | ||
| // Compute the width of each column | ||
| for (row <- rows) { | ||
| for ((cell, i) <- row.zipWithIndex) { | ||
| colWidths(i) = math.max(colWidths(i), cell.length) | ||
| } | ||
| } | ||
| }.addString(sb, "|", "|", "|\n") | ||
|
|
||
| sb.append(sep) | ||
| // Create SeparateLine | ||
| val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString() | ||
|
|
||
| // data | ||
| rows.tail.map { | ||
| _.zipWithIndex.map { case (cell, i) => | ||
| // column names | ||
| rows.head.zipWithIndex.map { case (cell, i) => | ||
| if (truncate > 0) { | ||
| StringUtils.leftPad(cell.toString, colWidths(i)) | ||
| StringUtils.leftPad(cell, colWidths(i)) | ||
| } else { | ||
| StringUtils.rightPad(cell.toString, colWidths(i)) | ||
| StringUtils.rightPad(cell, colWidths(i)) | ||
| } | ||
| }.addString(sb, "|", "|", "|\n") | ||
| } | ||
|
|
||
| sb.append(sep) | ||
| sb.append(sep) | ||
|
|
||
| // data | ||
| rows.tail.foreach { | ||
| _.zipWithIndex.map { case (cell, i) => | ||
| if (truncate > 0) { | ||
| StringUtils.leftPad(cell.toString, colWidths(i)) | ||
| } else { | ||
| StringUtils.rightPad(cell.toString, colWidths(i)) | ||
| } | ||
| }.addString(sb, "|", "|", "|\n") | ||
| } | ||
|
|
||
| sb.append(sep) | ||
| } else { | ||
| // Extended display mode enabled | ||
| val fieldNames = rows.head | ||
| val dataRows = rows.tail | ||
|
|
||
| // Compute the width of field name and data columns | ||
| val fieldNameColWidth = fieldNames.foldLeft(minimumColWidth) { case (curMax, fieldName) => | ||
| math.max(curMax, fieldName.length) | ||
| } | ||
| val dataColWidth = dataRows.foldLeft(minimumColWidth) { case (curMax, row) => | ||
| math.max(curMax, row.map(_.length).reduceLeftOption[Int] { case (cellMax, cell) => | ||
| math.max(cellMax, cell) | ||
| }.getOrElse(0)) | ||
| } | ||
|
|
||
| dataRows.zipWithIndex.foreach { case (row, i) => | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like the non-vertical output, when no row exists, we at least need to output the column names? df.limit(0).show(20, 0, true)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked and found that both pg and mysql output no column name in the mode;
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now, in this PR, we output nothing in this case. Postgres and MySQL at least output the message to indicate the result set is empty. cc @cloud-fan @sameeragarwal @hvanhovell @rxin Any suggestion here?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aha, I see. I'll update. Thanks!
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for indicating that result set is empty. About displaying column names if the output is empty, perhaps it'd be best to stick with the postgres/mysql semantics. |
||
| // "+ 5" in size means a character length except for padded names and data | ||
| val rowHeader = StringUtils.rightPad( | ||
| s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-") | ||
| sb.append(rowHeader).append("\n") | ||
| row.zipWithIndex.map { case (cell, j) => | ||
| val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth) | ||
| val data = StringUtils.rightPad(cell, dataColWidth) | ||
| s" $fieldName | $data " | ||
| }.addString(sb, "", "\n", "\n") | ||
| } | ||
| } | ||
|
|
||
| // For Data that has more than "numRows" records | ||
| if (hasMoreData) { | ||
|
|
@@ -663,8 +695,54 @@ class Dataset[T] private[sql]( | |
| * @group action | ||
| * @since 1.6.0 | ||
| */ | ||
| def show(numRows: Int, truncate: Int): Unit = show(numRows, truncate, extendedMode = false) | ||
|
|
||
| /** | ||
| * Displays the Dataset in a tabular form. For example: | ||
| * {{{ | ||
| * year month AVG('Adj Close) MAX('Adj Close) | ||
| * 1980 12 0.503218 0.595103 | ||
| * 1981 01 0.523289 0.570307 | ||
| * 1982 02 0.436504 0.475256 | ||
| * 1983 03 0.410516 0.442194 | ||
| * 1984 04 0.450090 0.483521 | ||
| * }}} | ||
| * | ||
| * If `extendedMode` enabled, this command prints a column dat per line: | ||
| * {{{ | ||
| * -RECORD 0----------------- | ||
| * c0 | 0.6988392500990668 | ||
| * c1 | 0.3035961718851606 | ||
| * c2 | 0.2446213804275899 | ||
| * c3 | 0.6132556607194246 | ||
| * c4 | 0.1904412430355646 | ||
| * c5 | 0.8856600775630444 | ||
| * -RECORD 1----------------- | ||
| * c0 | 0.3942727621020799 | ||
| * c1 | 0.6501707200059537 | ||
| * c2 | 0.2550059028276454 | ||
| * c3 | 0.9806662488156962 | ||
| * c4 | 0.8533897091838063 | ||
| * c5 | 0.3911189623246518 | ||
| * -RECORD 2----------------- | ||
| * c0 | 0.9024183805969801 | ||
| * c1 | 0.0242018765375147 | ||
| * c2 | 0.8508820250344251 | ||
| * c3 | 0.4593368817024575 | ||
| * c4 | 0.2216918145613194 | ||
| * c5 | 0.3756882647319614 | ||
|
||
| * }}} | ||
| * | ||
| * @param numRows Number of rows to show | ||
| * @param truncate If set to more than 0, truncates strings to `truncate` characters and | ||
| * all cells will be aligned right. | ||
| * @param extendedMode Enable expanded table formatting mode to print a column data per line. | ||
|
||
| * @group action | ||
| * @since 2.3.0 | ||
| */ | ||
| // scalastyle:off println | ||
| def show(numRows: Int, truncate: Int): Unit = println(showString(numRows, truncate)) | ||
| def show(numRows: Int, truncate: Int, extendedMode: Boolean): Unit = | ||
| println(showString(numRows, truncate, extendedMode)) | ||
| // scalastyle:on println | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
extendedMode->vertical?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yea, STGM. I'll update. Thanks!