apache · sameeragarwal · Sep 6, 2016 · Sep 7, 2016
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -21,13 +21,15 @@ import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration._
 
 import org.apache.spark.{broadcast, SparkException}
+import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, BroadcastPartitioning, Partitioning}
 import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -70,6 +72,7 @@ case class BroadcastExchangeExec(
       // This will run in another thread. Set the execution id so that we can connect these jobs
       // with the correct execution.
       SQLExecution.withExecutionId(sparkContext, executionId) {
+        try {
           val beforeCollect = System.nanoTime()
           // Note that we use .executeCollect() because we don't want to convert data to Scala types
           val input: Array[InternalRow] = child.executeCollect()
@@ -102,6 +105,14 @@ case class BroadcastExchangeExec(
           }
 
           broadcasted
+        } catch {
+          case oe: OutOfMemoryError =>
+            throw new OutOfMemoryError(s"Not enough memory to build and broadcast the table to " +
+              s"all worker nodes. As a workaround, you can either disable broadcast by setting " +
+              s"${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key} to -1 or increase the spark driver " +
+              s"memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher value")
+              .initCause(oe.getCause)
+        }
       }
     }(BroadcastExchangeExec.executionContext)
   }