[SPARK-21213][SQL] addressed easy review comments

apache · mbasmanova · Jun 12, 2017 · Jun 28, 2017 · Jun 28, 2017 · Jun 28, 2017
commit 7210568198e3b60ce3e255a1c8c5f46faa64b41f
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -110,11 +110,11 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
 
     val partitionSpec =
       if (ctx.partitionSpec != null) {
-        val filteredSpec = visitPartitionSpec(ctx.partitionSpec).filter(x => x._2.isDefined)
+        val filteredSpec = visitPartitionSpec(ctx.partitionSpec).filter(_._2.isDefined)
         if (filteredSpec.isEmpty) {
           None
         } else {
-          Some(filteredSpec.mapValues(v => v.get))
+          Some(filteredSpec.mapValues(_.get))
         }
       } else {
         None

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -77,8 +77,9 @@ case class AnalyzeTableCommand(
           calculateRowCountsPerPartition(sparkSession, tableMeta)
         }
 
-      partitions.foreach(p => {
-        val newTotalSize = CommandUtils.calculateTotalSize(sessionState, tableMeta, p)
+      partitions.foreach { p =>
+        val newTotalSize = CommandUtils.calculateLocationSize(sessionState,
+          tableMeta.identifier, p.storage.locationUri)
         val newRowCount = rowCounts.get(p.spec)
 
         def updateStats(newStats: CatalogStatistics): Unit = {
@@ -87,7 +88,7 @@ case class AnalyzeTableCommand(
         }
 
         calculateAndUpdateStats(p.stats, newTotalSize, newRowCount, updateStats)
-      })
+      }
     }
 
     Seq.empty[Row]
@@ -99,27 +100,22 @@ case class AnalyzeTableCommand(
     val filters = partitionSpec.get.map {
       case (columnName, value) => EqualTo(UnresolvedAttribute(columnName), Literal(value))
     }
-    val filter = filters match {
-      case head :: tail =>
-        if (tail.isEmpty) head
-        else tail.foldLeft(head: Expression)((a, b) => And(a, b))
-    }
+    val filter = filters.reduce(And)
 
-    val partitionColumns = tableMeta.partitionColumnNames.map(c => Column(c))
+    val partitionColumns = tableMeta.partitionColumnNames.map(Column(_))
 
     val df = sparkSession.table(tableMeta.identifier).filter(Column(filter))
       .groupBy(partitionColumns: _*).count()
 
     val numPartitionColumns = partitionColumns.size
-    val partitionColumnIndexes = 0 to (numPartitionColumns - 1)
 
-    df.collect().map(r => {
-      val partitionColumnValues = partitionColumnIndexes.map(i => r.get(i).toString)
+    df.collect().map { r =>
+      val partitionColumnValues = partitionColumns.indices.map(r.get(_).toString)
       val spec: TablePartitionSpec =
         tableMeta.partitionColumnNames.zip(partitionColumnValues).toMap
       val count = BigInt(r.getLong(numPartitionColumns))
       (spec, count)
-    }).toMap
+    }.toMap
   }
 
   private def calculateAndUpdateStats(

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -59,13 +59,6 @@ object CommandUtils extends Logging {
     }
   }
 
-  def calculateTotalSize(
-      sessionState: SessionState,
-      catalogTable: CatalogTable,
-      partition: CatalogTablePartition): Long = {
-    calculateLocationSize(sessionState, catalogTable.identifier, partition.storage.locationUri)
-  }
-
   def calculateLocationSize(
       sessionState: SessionState,
       identifier: TableIdentifier,

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -646,7 +646,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     // convert table statistics to properties so that we can persist them through hive client
     var statsProperties =
       if (stats.isDefined) {
-        statsToHiveProperties(stats.get, rawTable.schema)
+        statsToProperties(stats.get, rawTable.schema)
       } else {
         new mutable.HashMap[String, String]()
       }
@@ -696,7 +696,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     // Restore Spark's statistics from information in Metastore.
     val restoredStats =
-      statsFromHiveProperties(table.properties, table.identifier.table, table.schema)
+      statsFromProperties(table.properties, table.identifier.table, table.schema)
     if (restoredStats.isDefined) {
       table = table.copy(stats = restoredStats)
     }
@@ -1002,7 +1002,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     currentFullPath
   }
 
-  private def statsToHiveProperties(
+  private def statsToProperties(
       stats: CatalogStatistics,
       schema: StructType): Map[String, String] = {
 
@@ -1023,7 +1023,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     statsProperties
   }
 
-  private def statsFromHiveProperties(
+  private def statsFromProperties(
       properties: Map[String, String],
       table: String,
       schema: StructType): Option[CatalogStatistics] = {
@@ -1075,7 +1075,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     // convert partition statistics to properties so that we can persist them through hive api
     val withStatsProps = lowerCasedParts.map(p => {
       if (p.stats.isDefined) {
-        val statsProperties = statsToHiveProperties(p.stats.get, rawTable.schema)
+        val statsProperties = statsToProperties(p.stats.get, rawTable.schema)
         p.copy(parameters = p.parameters ++ statsProperties)
       } else {
         p
@@ -1105,7 +1105,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     // construct Spark's statistics from information in Hive metastore
     val restoredStats =
-      statsFromHiveProperties(partition.parameters, table.identifier.table, table.schema)
+      statsFromProperties(partition.parameters, table.identifier.table, table.schema)
     if (restoredStats.isDefined) {
       partition.copy(
         spec = restoredSpec,

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -279,9 +279,9 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
          """.stripMargin)
       sql(s"INSERT INTO TABLE $tableName PARTITION (ds='2010-01-03') SELECT * FROM src")
 
-      sql(s"ANALYZE TABLE $tableName PARTITION (ds='2010-01-01') COMPUTE STATISTICS").collect()
+      sql(s"ANALYZE TABLE $tableName PARTITION (ds='2010-01-01') COMPUTE STATISTICS")
 
-      sql(s"ANALYZE TABLE $tableName PARTITION (ds='2010-01-02') COMPUTE STATISTICS").collect()
+      sql(s"ANALYZE TABLE $tableName PARTITION (ds='2010-01-02') COMPUTE STATISTICS")
 
       assert(queryStats("2010-01-01").rowCount.get === 500)
       assert(queryStats("2010-01-01").sizeInBytes === 5812)